Update app.py
Browse files
app.py
CHANGED
|
@@ -45,9 +45,9 @@ import pytesseract
|
|
| 45 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
| 46 |
from PIL import Image
|
| 47 |
def read_pdf(file):
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
for i in range(count):
|
| 52 |
page = pdfReader.getPage(i)
|
| 53 |
image_name = "Page_" + str(i) + ".jpg"
|
|
|
|
| 45 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
| 46 |
from PIL import Image
|
| 47 |
def read_pdf(file):
|
| 48 |
+
pdfReader = PdfFileReader(file)
|
| 49 |
+
count = pdfReader.numPages
|
| 50 |
+
all_page_text = ""
|
| 51 |
for i in range(count):
|
| 52 |
page = pdfReader.getPage(i)
|
| 53 |
image_name = "Page_" + str(i) + ".jpg"
|