|
| 1 | +import sys |
| 2 | +import os |
| 3 | +from PyPDF2 import PdfReader |
| 4 | +from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog, QPushButton, QTextEdit, QVBoxLayout, QWidget |
| 5 | + |
| 6 | + |
| 7 | +class PDFTextExtractorApp(QMainWindow): |
| 8 | + def __init__(self): |
| 9 | + super().__init__() |
| 10 | + |
| 11 | + |
| 12 | + self.initUI() |
| 13 | + |
| 14 | + def initUI(self): |
| 15 | + self.setGeometry(100, 100, 800, 600) |
| 16 | + self.setWindowTitle('PDF Text Extractor') |
| 17 | + |
| 18 | + self.text_edit = QTextEdit(self) |
| 19 | + self.text_edit.setReadOnly(True) |
| 20 | + |
| 21 | + self.btn_open_pdf = QPushButton('Open PDF File', self) |
| 22 | + self.btn_open_pdf.clicked.connect(self.openPDF) |
| 23 | + |
| 24 | + self.btn_extract_text = QPushButton('Extract Text', self) |
| 25 | + self.btn_extract_text.clicked.connect(self.extractText) |
| 26 | + |
| 27 | + layout = QVBoxLayout() |
| 28 | + layout.addWidget(self.text_edit) |
| 29 | + layout.addWidget(self.btn_open_pdf) |
| 30 | + layout.addWidget(self.btn_extract_text) |
| 31 | + |
| 32 | + container = QWidget() |
| 33 | + container.setLayout(layout) |
| 34 | + self.setCentralWidget(container) |
| 35 | + |
| 36 | + def openPDF(self): |
| 37 | + options = QFileDialog.Options() |
| 38 | + options |= QFileDialog.ReadOnly |
| 39 | + pdf_file, _ = QFileDialog.getOpenFileName( |
| 40 | + self, 'Open PDF File', '', 'PDF Files (*.pdf);;All Files (*)', options=options) |
| 41 | + if pdf_file: |
| 42 | + self.pdf_path = pdf_file |
| 43 | + |
| 44 | + def extractText(self): |
| 45 | + if hasattr(self, 'pdf_path'): |
| 46 | + pdf_path = self.pdf_path |
| 47 | + text = self.extractTextFromPDF(pdf_path) |
| 48 | + self.text_edit.setPlainText(text) |
| 49 | + else: |
| 50 | + self.text_edit.setPlainText('No PDF file selected.') |
| 51 | + |
| 52 | + def extractTextFromPDF(self, pdf_path): |
| 53 | + pdf_text = '' |
| 54 | + pdf_reader = PdfReader(pdf_path) |
| 55 | + for page in pdf_reader.pages: |
| 56 | + pdf_text += page.extract_text() |
| 57 | + return pdf_text |
| 58 | + |
| 59 | + |
| 60 | +def main(): |
| 61 | + app = QApplication(sys.argv) |
| 62 | + window = PDFTextExtractorApp() |
| 63 | + window.show() |
| 64 | + sys.exit(app.exec_()) |
| 65 | + |
| 66 | + |
| 67 | +if __name__ == '__main__': |
| 68 | + main() |
0 commit comments