|
1 | | -import pyttsx3 # python text to speech version 3 |
2 | | -import PyPDF2 |
3 | | - |
4 | | -pdf = open("Andriy_Burkov.pdf", "rb") |
5 | | - |
6 | | -pdfreader = PyPDF2.PdfFileReader(pdf, strict=False) |
7 | | - |
8 | | -pages = pdfreader.numPages |
9 | | -# print(pages) you can used this to print number of pages |
10 | | - |
11 | | - |
12 | | -dell = pyttsx3.init() |
13 | | - |
14 | | -pages = pdfreader.getPage(7) |
15 | | - |
16 | | -text = pages.extractText() |
17 | | - |
18 | | -# dell.say("Hello How Can I help you") |
19 | | -dell.say(text) |
20 | | - |
21 | | -dell.runAndWait() |
| 1 | +import os |
| 2 | +import re |
| 3 | +import openai |
| 4 | +from dotenv import load_dotenv |
| 5 | +from langchain.document_loaders import PyPDFLoader |
| 6 | +from langchain.chat_models import ChatOpenAI |
| 7 | +from langchain.chains.summarize import load_summarize_chain |
| 8 | +from langchain.embeddings.openai import OpenAIEmbeddings |
| 9 | +from langchain.vectorstores import Chroma |
| 10 | +from langchain.text_splitter import CharacterTextSplitter |
| 11 | +from langchain.llms import OpenAI |
| 12 | +from langchain.chains import ConversationalRetrievalChain |
| 13 | +from langchain.memory import ConversationBufferMemory |
| 14 | + |
| 15 | +load_dotenv() |
| 16 | +openai_api_key = os.getenv("API_KEY") |
| 17 | +openai.openai_api_key=openai_api_key |
| 18 | + |
| 19 | +loader = PyPDFLoader("promptEngineering.pdf") |
| 20 | +docs = loader.load_and_split() |
| 21 | + |
| 22 | +text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) |
| 23 | +documents = text_splitter.split_documents(docs) |
| 24 | +embeddings = OpenAIEmbeddings() |
| 25 | +vectorstore = Chroma.from_documents(documents, embeddings) |
| 26 | +memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) |
| 27 | + |
| 28 | +def originalText(docs): |
| 29 | + text=str(docs) |
| 30 | + regex = r"(Document)|(page_content=)|(metadata={'source':)|('page': \d})\)|(\\n)" |
| 31 | + text=re.sub(regex, "", text) |
| 32 | + return text |
| 33 | + |
| 34 | +text=originalText(docs) |
| 35 | +print("Text in pdf", text) |
| 36 | + |
| 37 | +def summarizeText(): |
| 38 | + llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k") |
| 39 | + chain = load_summarize_chain(llm, chain_type="stuff") |
| 40 | + return chain.run(docs) |
| 41 | + |
| 42 | +summary=summarizeText() |
| 43 | +print("Summary of text in pdf", summary) |
| 44 | + |
| 45 | +def QA(query): |
| 46 | + qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), memory=memory) |
| 47 | + query = "What is prompt engineering?" |
| 48 | + result = qa({"question": query}) |
| 49 | + result=str(result['chat_history'][1]) |
| 50 | + result=result.split('content=\'')[1] |
| 51 | + return result |
| 52 | + |
| 53 | +print("INSTRUCTIONS:") |
| 54 | +print("Enter the question you want to ask from pdf text OR press \"-1\" to STOP") |
| 55 | +while(True): |
| 56 | + user_input=input("Enter your question: ") |
| 57 | + if(user_input=="-1"): |
| 58 | + break |
| 59 | + else: |
| 60 | + print(QA(user_input)) |
0 commit comments