Update app.py
Browse files
app.py
CHANGED
|
@@ -34,6 +34,8 @@ from PyPDF2 import PdfFileReader
|
|
| 34 |
from pdf2image import convert_from_bytes
|
| 35 |
import pdfplumber
|
| 36 |
from line_cor import mark_region
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# NLP Pkgs
|
| 39 |
from textblob import TextBlob
|
|
@@ -46,8 +48,9 @@ import pytesseract
|
|
| 46 |
|
| 47 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
| 48 |
from PIL import Image
|
|
|
|
| 49 |
def read_pdf(file):
|
| 50 |
-
images=pdf2image.convert_from_bytes(file.read())
|
| 51 |
#pdfReader = PdfFileReader(file)
|
| 52 |
#count = pdfReader.numPages
|
| 53 |
all_page_text = ""
|
|
|
|
| 34 |
from pdf2image import convert_from_bytes
|
| 35 |
import pdfplumber
|
| 36 |
from line_cor import mark_region
|
| 37 |
+
import pdf2image
|
| 38 |
+
|
| 39 |
|
| 40 |
# NLP Pkgs
|
| 41 |
from textblob import TextBlob
|
|
|
|
| 48 |
|
| 49 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
| 50 |
from PIL import Image
|
| 51 |
+
@st.experimental_singleton
|
| 52 |
def read_pdf(file):
|
| 53 |
+
images=pdf2image.convert_from_bytes(file.read(),"rb")
|
| 54 |
#pdfReader = PdfFileReader(file)
|
| 55 |
#count = pdfReader.numPages
|
| 56 |
all_page_text = ""
|