Commit
·
e21c1b2
1
Parent(s):
6401e6f
Update files/functions.py
Browse files- files/functions.py +14 -3
files/functions.py
CHANGED
|
@@ -25,8 +25,7 @@ import pypdf
|
|
| 25 |
from pypdf import PdfReader
|
| 26 |
from pypdf.errors import PdfReadError
|
| 27 |
|
| 28 |
-
import
|
| 29 |
-
from pdf2image import convert_from_path
|
| 30 |
import langdetect
|
| 31 |
from langdetect import detect_langs
|
| 32 |
|
|
@@ -409,7 +408,19 @@ def pdf_to_images(uploaded_pdf):
|
|
| 409 |
images = [Image.open(image_blank)]
|
| 410 |
else:
|
| 411 |
try:
|
| 412 |
-
images = convert_from_path(path_to_file, last_page=max_imgboxes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
num_imgs = len(images)
|
| 414 |
msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
|
| 415 |
except:
|
|
|
|
| 25 |
from pypdf import PdfReader
|
| 26 |
from pypdf.errors import PdfReadError
|
| 27 |
|
| 28 |
+
import pypdfium2 as pdfium
|
|
|
|
| 29 |
import langdetect
|
| 30 |
from langdetect import detect_langs
|
| 31 |
|
|
|
|
| 408 |
images = [Image.open(image_blank)]
|
| 409 |
else:
|
| 410 |
try:
|
| 411 |
+
# images = convert_from_path(path_to_file, last_page=max_imgboxes)
|
| 412 |
+
|
| 413 |
+
pdf = pdfium.PdfDocument(str(filename))
|
| 414 |
+
version = pdf.get_version() # get the PDF standard version
|
| 415 |
+
n_pages = len(pdf) # get the number of pages in the document
|
| 416 |
+
last_page = max_imgboxes
|
| 417 |
+
page_indices = [i for i in range(last_page)] # pages until last_page
|
| 418 |
+
images = list(pdf.render(
|
| 419 |
+
pdfium.PdfBitmap.to_pil,
|
| 420 |
+
page_indices = page_indices,
|
| 421 |
+
scale = 300/72, # 300dpi resolution
|
| 422 |
+
))
|
| 423 |
+
|
| 424 |
num_imgs = len(images)
|
| 425 |
msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
|
| 426 |
except:
|