Spaces:

prithivMLmods
/

Multimodal-OCR3

Running on Zero

App Files Files Community

prithivMLmods commited on 7 days ago

Commit

147f26b

verified ·

1 Parent(s): 0e197cb

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -32

app.py CHANGED Viewed

@@ -24,6 +24,14 @@ from transformers import (
     TextIteratorStreamer,
 )
 from transformers.image_utils import load_image
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
@@ -122,37 +130,6 @@ if torch.cuda.is_available():
 print("Using device:", device)
-# CACHE_PATH = "./model_cache"
-# if not os.path.exists(CACHE_PATH):
-#     os.makedirs(CACHE_PATH)
-#
-# model_path_d_local = snapshot_download(
-#     repo_id='rednote-hilab/dots.ocr',
-#     local_dir=os.path.join(CACHE_PATH, 'dots.ocr'),
-#     max_workers=20,
-#     local_dir_use_symlinks=False
-# )
-#
-# config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
-#
-# if os.path.exists(config_file_path):
-#     with open(config_file_path, 'r') as f:
-#         input_code = f.read()
-#
-#     lines = input_code.splitlines()
-#     if "class DotsVLProcessor" in input_code and not any("attributes = " in line for line in lines):
-#         output_lines = []
-#         for line in lines:
-#             output_lines.append(line)
-#             if line.strip().startswith("class DotsVLProcessor"):
-#                 output_lines.append("    attributes = [\"image_processor\", \"tokenizer\"]")
-#
-#         with open(config_file_path, 'w') as f:
-#             f.write('\n'.join(output_lines))
-#         print("Patched configuration_dots.py successfully.")
-#
-#sys.path.append(model_path_d_local)
 MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 2048
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -171,12 +148,15 @@ model_v = Qwen3VLForConditionalGeneration.from_pretrained(
 # Load Nanonets-OCR2-3B
 MODEL_ID_X = "prithivMLmods/Nanonets-OCR2-3B-AWQ-nvfp4"
 processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
 model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
     trust_remote_code=True,
-    torch_dtype="auto",
 ).to(device).eval()
 # Load Dots.OCR from the local, patched directory
 MODEL_PATH_D = "prithivMLmods/Dots.OCR-Latest-BF16"
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
@@ -305,4 +285,10 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     )
 if __name__ == "__main__":
     demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)

     TextIteratorStreamer,
 )
+# It's good practice to ensure compressed_tensors is installed when dealing with such models
+try:
+    from compressed_tensors import save_compressed, load_compressed, BitmaskConfig
+except ImportError:
+    print("compressed_tensors is not installed. Please install it using 'pip install compressed-tensors'")
+    sys.exit(1)
 from transformers.image_utils import load_image
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
 print("Using device:", device)
 MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 2048
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 # Load Nanonets-OCR2-3B
 MODEL_ID_X = "prithivMLmods/Nanonets-OCR2-3B-AWQ-nvfp4"
 processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
+# The fix is to load the model in a supported dtype like float16.
+# The `compressed-tensors` library will handle the dequantization from Float8_e4m3fn.
 model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
     trust_remote_code=True,
+    torch_dtype=torch.float16,  # Change "auto" to torch.float16
 ).to(device).eval()
 # Load Dots.OCR from the local, patched directory
 MODEL_PATH_D = "prithivMLmods/Dots.OCR-Latest-BF16"
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
     )
 if __name__ == "__main__":
+    # To run this, you would need to have example images in an "examples" directory
+    # or upload your own images.
+    if not os.path.exists("examples"):
+        os.makedirs("examples")
+        print("Created 'examples' directory. Please add your example images there.")
     demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)