Commit
·
d3f80ab
1
Parent(s):
c45522e
Update files/functions.py
Browse files- files/functions.py +1 -39
files/functions.py
CHANGED
|
@@ -358,7 +358,7 @@ def pdf_to_images(uploaded_pdf):
|
|
| 358 |
except PdfReadError:
|
| 359 |
path_to_file = pdf_blank
|
| 360 |
filename = path_to_file.replace(examples_dir,"")
|
| 361 |
-
msg = "
|
| 362 |
images = [Image.open(image_blank)]
|
| 363 |
else:
|
| 364 |
try:
|
|
@@ -371,44 +371,6 @@ def pdf_to_images(uploaded_pdf):
|
|
| 371 |
|
| 372 |
return filename, msg, images
|
| 373 |
|
| 374 |
-
# get filename and images of PDF pages
|
| 375 |
-
def pdf_to_images(uploaded_pdf):
|
| 376 |
-
|
| 377 |
-
# file name of the uploaded PDF
|
| 378 |
-
filename = next(iter(uploaded_pdf))
|
| 379 |
-
|
| 380 |
-
try:
|
| 381 |
-
PdfReader(filename)
|
| 382 |
-
except PdfReadError:
|
| 383 |
-
print("Invalid PDF file.")
|
| 384 |
-
else:
|
| 385 |
-
try:
|
| 386 |
-
images = convert_from_path(str(filename))
|
| 387 |
-
num_imgs = len(images)
|
| 388 |
-
print(f'The PDF "{filename}"" was converted into {num_imgs} images.')
|
| 389 |
-
print("Now, you can extract data from theses images (text, bounding boxes...).")
|
| 390 |
-
except:
|
| 391 |
-
print(f"Error with the PDF {filename}:it was not converted into images.")
|
| 392 |
-
print()
|
| 393 |
-
else:
|
| 394 |
-
# display images
|
| 395 |
-
if num_imgs > 0:
|
| 396 |
-
|
| 397 |
-
import matplotlib.pyplot as plt
|
| 398 |
-
%matplotlib inline
|
| 399 |
-
|
| 400 |
-
plt.figure(figsize=(20,10))
|
| 401 |
-
columns = 5
|
| 402 |
-
for i, image in enumerate(images):
|
| 403 |
-
plt.subplot(num_imgs / columns + 1, columns, i + 1)
|
| 404 |
-
plt.xticks(color="white")
|
| 405 |
-
plt.yticks(color="white")
|
| 406 |
-
plt.tick_params(bottom = False)
|
| 407 |
-
plt.tick_params(left = False)
|
| 408 |
-
plt.imshow(image)
|
| 409 |
-
|
| 410 |
-
return filename, images
|
| 411 |
-
|
| 412 |
# Extraction of image data (text and bounding boxes)
|
| 413 |
def extraction_data_from_image(images):
|
| 414 |
|
|
|
|
| 358 |
except PdfReadError:
|
| 359 |
path_to_file = pdf_blank
|
| 360 |
filename = path_to_file.replace(examples_dir,"")
|
| 361 |
+
msg = "invalid PDF file."
|
| 362 |
images = [Image.open(image_blank)]
|
| 363 |
else:
|
| 364 |
try:
|
|
|
|
| 371 |
|
| 372 |
return filename, msg, images
|
| 373 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
# Extraction of image data (text and bounding boxes)
|
| 375 |
def extraction_data_from_image(images):
|
| 376 |
|