Skip to content
This repository was archived by the owner on Apr 24, 2025. It is now read-only.

Commit c25d044

Browse files
Merge pull request #370 from parth-verma7/main
AI PDF Reader
2 parents 702ced7 + 8296c4a commit c25d044

2 files changed

Lines changed: 60 additions & 21 deletions

File tree

projects/PDF_Reader/PDF_Reader.py

Lines changed: 60 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,60 @@
1-
import pyttsx3 # python text to speech version 3
2-
import PyPDF2
3-
4-
pdf = open("Andriy_Burkov.pdf", "rb")
5-
6-
pdfreader = PyPDF2.PdfFileReader(pdf, strict=False)
7-
8-
pages = pdfreader.numPages
9-
# print(pages) you can used this to print number of pages
10-
11-
12-
dell = pyttsx3.init()
13-
14-
pages = pdfreader.getPage(7)
15-
16-
text = pages.extractText()
17-
18-
# dell.say("Hello How Can I help you")
19-
dell.say(text)
20-
21-
dell.runAndWait()
1+
import os
2+
import re
3+
import openai
4+
from dotenv import load_dotenv
5+
from langchain.document_loaders import PyPDFLoader
6+
from langchain.chat_models import ChatOpenAI
7+
from langchain.chains.summarize import load_summarize_chain
8+
from langchain.embeddings.openai import OpenAIEmbeddings
9+
from langchain.vectorstores import Chroma
10+
from langchain.text_splitter import CharacterTextSplitter
11+
from langchain.llms import OpenAI
12+
from langchain.chains import ConversationalRetrievalChain
13+
from langchain.memory import ConversationBufferMemory
14+
15+
load_dotenv()
16+
openai_api_key = os.getenv("API_KEY")
17+
openai.openai_api_key=openai_api_key
18+
19+
loader = PyPDFLoader("promptEngineering.pdf")
20+
docs = loader.load_and_split()
21+
22+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
23+
documents = text_splitter.split_documents(docs)
24+
embeddings = OpenAIEmbeddings()
25+
vectorstore = Chroma.from_documents(documents, embeddings)
26+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
27+
28+
def originalText(docs):
29+
text=str(docs)
30+
regex = r"(Document)|(page_content=)|(metadata={'source':)|('page': \d})\)|(\\n)"
31+
text=re.sub(regex, "", text)
32+
return text
33+
34+
text=originalText(docs)
35+
print("Text in pdf", text)
36+
37+
def summarizeText():
38+
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
39+
chain = load_summarize_chain(llm, chain_type="stuff")
40+
return chain.run(docs)
41+
42+
summary=summarizeText()
43+
print("Summary of text in pdf", summary)
44+
45+
def QA(query):
46+
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), memory=memory)
47+
query = "What is prompt engineering?"
48+
result = qa({"question": query})
49+
result=str(result['chat_history'][1])
50+
result=result.split('content=\'')[1]
51+
return result
52+
53+
print("INSTRUCTIONS:")
54+
print("Enter the question you want to ask from pdf text OR press \"-1\" to STOP")
55+
while(True):
56+
user_input=input("Enter your question: ")
57+
if(user_input=="-1"):
58+
break
59+
else:
60+
print(QA(user_input))
3.07 KB
Binary file not shown.

0 commit comments

Comments
 (0)