Spaces:
Running
Running
Commit
·
98c7b0e
1
Parent(s):
0cc7d4a
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,8 +29,8 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
| 29 |
DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
|
| 30 |
DATA_FILENAME = "ocr_data.csv"
|
| 31 |
DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
|
| 32 |
-
|
| 33 |
-
|
| 34 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 35 |
DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
|
| 36 |
print("is none?", HF_TOKEN is None)
|
|
@@ -117,6 +117,23 @@ def ocr_with_easy(img):
|
|
| 117 |
bounds = reader.readtext('image.png',paragraph="False",detail = 0)
|
| 118 |
bounds = ''.join(bounds)
|
| 119 |
return bounds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
"""
|
| 121 |
Generate OCR
|
| 122 |
"""
|
|
@@ -124,7 +141,7 @@ def generate_ocr(Method,img):
|
|
| 124 |
try:
|
| 125 |
text_output = ''
|
| 126 |
add_csv = []
|
| 127 |
-
|
| 128 |
print("Method___________________",Method)
|
| 129 |
if Method == 'EasyOCR':
|
| 130 |
text_output = ocr_with_easy(img)
|
|
@@ -143,6 +160,15 @@ def generate_ocr(Method,img):
|
|
| 143 |
writer.writerow(add_csv)
|
| 144 |
commit_url = repo.push_to_hub()
|
| 145 |
print(commit_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
# try:
|
| 147 |
# dataset = load_dataset("pragnakalp/OCR-img-to-text", streaming=True)
|
| 148 |
# print(dataset.features)
|
|
|
|
| 29 |
DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
|
| 30 |
DATA_FILENAME = "ocr_data.csv"
|
| 31 |
DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
|
| 32 |
+
DATA_FILENAME2 = "ocr_image.csv"
|
| 33 |
+
DATA_FILE2 = os.path.join("ocr_image", DATA_FILENAME2)
|
| 34 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 35 |
DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
|
| 36 |
print("is none?", HF_TOKEN is None)
|
|
|
|
| 117 |
bounds = reader.readtext('image.png',paragraph="False",detail = 0)
|
| 118 |
bounds = ''.join(bounds)
|
| 119 |
return bounds
|
| 120 |
+
|
| 121 |
+
# def store_single_disk(image, image_id, label):
|
| 122 |
+
# """ Stores a single image as a .png file on disk.
|
| 123 |
+
# Parameters:
|
| 124 |
+
# ---------------
|
| 125 |
+
# image image array, (32, 32, 3) to be stored
|
| 126 |
+
# image_id integer unique ID for image
|
| 127 |
+
# label image label
|
| 128 |
+
# """
|
| 129 |
+
# Image.fromarray(image).save(disk_dir / f"{image_id}.png")
|
| 130 |
+
|
| 131 |
+
# with open(disk_dir / f"{image_id}.csv", "wt") as csvfile:
|
| 132 |
+
# writer = csv.writer(
|
| 133 |
+
# csvfile, delimiter=" ", quotechar="|", quoting=csv.QUOTE_MINIMAL
|
| 134 |
+
# )
|
| 135 |
+
# writer.writerow([label])
|
| 136 |
+
|
| 137 |
"""
|
| 138 |
Generate OCR
|
| 139 |
"""
|
|
|
|
| 141 |
try:
|
| 142 |
text_output = ''
|
| 143 |
add_csv = []
|
| 144 |
+
image_id = 1
|
| 145 |
print("Method___________________",Method)
|
| 146 |
if Method == 'EasyOCR':
|
| 147 |
text_output = ocr_with_easy(img)
|
|
|
|
| 160 |
writer.writerow(add_csv)
|
| 161 |
commit_url = repo.push_to_hub()
|
| 162 |
print(commit_url)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
Image.fromarray(image).save(DATA_FILE2 / f"{image_id}.png")
|
| 166 |
+
with open(DATA_FILE2, "wt") as csvfile:
|
| 167 |
+
writer = csv.writer(
|
| 168 |
+
csvfile, delimiter=" ", quotechar="|", quoting=csv.QUOTE_MINIMAL
|
| 169 |
+
)
|
| 170 |
+
writer.writerow([0])
|
| 171 |
+
|
| 172 |
# try:
|
| 173 |
# dataset = load_dataset("pragnakalp/OCR-img-to-text", streaming=True)
|
| 174 |
# print(dataset.features)
|