Spaces:
Running
Running
Commit
·
93fe459
1
Parent(s):
3554b07
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ from huggingface_hub import Repository
|
|
| 12 |
from datetime import datetime
|
| 13 |
import scipy.ndimage.interpolation as inter
|
| 14 |
import easyocr
|
|
|
|
| 15 |
from datasets import load_dataset, Image, Features, Array3D
|
| 16 |
from PIL import Image
|
| 17 |
from paddleocr import PaddleOCR
|
|
@@ -29,8 +30,8 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
| 29 |
DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
|
| 30 |
DATA_FILENAME = "ocr_data.csv"
|
| 31 |
DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
|
| 32 |
-
DATA_FILENAME2 = "image"
|
| 33 |
-
DATA_FILE2 = os.path.join("ocr_data",DATA_FILENAME2)
|
| 34 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 35 |
DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
|
| 36 |
print("is none?", HF_TOKEN is None)
|
|
@@ -153,16 +154,19 @@ def generate_ocr(Method,img):
|
|
| 153 |
new_data=img.reshape(img.shape)
|
| 154 |
imge = Image.fromarray(new_data.astype(np.uint8),'RGB')
|
| 155 |
add_csv = [Method,imge,text_output]
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
with open(DATA_FILE, "a") as f:
|
| 158 |
writer = csv.writer(f)
|
| 159 |
# write the data
|
| 160 |
writer.writerow(add_csv)
|
| 161 |
commit_url = repo.push_to_hub()
|
| 162 |
print(commit_url)
|
| 163 |
-
|
| 164 |
|
| 165 |
-
print("^^%%",Image.fromarray(img).save(DATA_FILE2 +"/"+ f"{image_id}.png"))
|
| 166 |
# with open(DATA_FILE2, "wt") as csvfile:
|
| 167 |
# writer = csv.writer(
|
| 168 |
# csvfile, delimiter=" ", quotechar="|", quoting=csv.QUOTE_MINIMAL
|
|
|
|
| 12 |
from datetime import datetime
|
| 13 |
import scipy.ndimage.interpolation as inter
|
| 14 |
import easyocr
|
| 15 |
+
import datasets
|
| 16 |
from datasets import load_dataset, Image, Features, Array3D
|
| 17 |
from PIL import Image
|
| 18 |
from paddleocr import PaddleOCR
|
|
|
|
| 30 |
DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
|
| 31 |
DATA_FILENAME = "ocr_data.csv"
|
| 32 |
DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
|
| 33 |
+
# DATA_FILENAME2 = "image"
|
| 34 |
+
# DATA_FILE2 = os.path.join("ocr_data",DATA_FILENAME2)
|
| 35 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 36 |
DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
|
| 37 |
print("is none?", HF_TOKEN is None)
|
|
|
|
| 154 |
new_data=img.reshape(img.shape)
|
| 155 |
imge = Image.fromarray(new_data.astype(np.uint8),'RGB')
|
| 156 |
add_csv = [Method,imge,text_output]
|
| 157 |
+
|
| 158 |
+
feature = datasets.Image(decode=False)
|
| 159 |
+
new_image = {'image': feature.encode_example(imge)}
|
| 160 |
+
dataset['test'] = dataset['test'].add_item(new_image)
|
| 161 |
+
|
| 162 |
with open(DATA_FILE, "a") as f:
|
| 163 |
writer = csv.writer(f)
|
| 164 |
# write the data
|
| 165 |
writer.writerow(add_csv)
|
| 166 |
commit_url = repo.push_to_hub()
|
| 167 |
print(commit_url)
|
|
|
|
| 168 |
|
| 169 |
+
# print("^^%%",Image.fromarray(img).save(DATA_FILE2 +"/"+ f"{image_id}.png"))
|
| 170 |
# with open(DATA_FILE2, "wt") as csvfile:
|
| 171 |
# writer = csv.writer(
|
| 172 |
# csvfile, delimiter=" ", quotechar="|", quoting=csv.QUOTE_MINIMAL
|