prithivMLmods commited on
Commit
147f26b
·
verified ·
1 Parent(s): 0e197cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -32
app.py CHANGED
@@ -24,6 +24,14 @@ from transformers import (
24
  TextIteratorStreamer,
25
  )
26
 
 
 
 
 
 
 
 
 
27
  from transformers.image_utils import load_image
28
  from gradio.themes import Soft
29
  from gradio.themes.utils import colors, fonts, sizes
@@ -122,37 +130,6 @@ if torch.cuda.is_available():
122
 
123
  print("Using device:", device)
124
 
125
- # CACHE_PATH = "./model_cache"
126
- # if not os.path.exists(CACHE_PATH):
127
- # os.makedirs(CACHE_PATH)
128
- #
129
- # model_path_d_local = snapshot_download(
130
- # repo_id='rednote-hilab/dots.ocr',
131
- # local_dir=os.path.join(CACHE_PATH, 'dots.ocr'),
132
- # max_workers=20,
133
- # local_dir_use_symlinks=False
134
- # )
135
- #
136
- # config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
137
- #
138
- # if os.path.exists(config_file_path):
139
- # with open(config_file_path, 'r') as f:
140
- # input_code = f.read()
141
- #
142
- # lines = input_code.splitlines()
143
- # if "class DotsVLProcessor" in input_code and not any("attributes = " in line for line in lines):
144
- # output_lines = []
145
- # for line in lines:
146
- # output_lines.append(line)
147
- # if line.strip().startswith("class DotsVLProcessor"):
148
- # output_lines.append(" attributes = [\"image_processor\", \"tokenizer\"]")
149
- #
150
- # with open(config_file_path, 'w') as f:
151
- # f.write('\n'.join(output_lines))
152
- # print("Patched configuration_dots.py successfully.")
153
- #
154
- #sys.path.append(model_path_d_local)
155
-
156
  MAX_MAX_NEW_TOKENS = 4096
157
  DEFAULT_MAX_NEW_TOKENS = 2048
158
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -171,12 +148,15 @@ model_v = Qwen3VLForConditionalGeneration.from_pretrained(
171
  # Load Nanonets-OCR2-3B
172
  MODEL_ID_X = "prithivMLmods/Nanonets-OCR2-3B-AWQ-nvfp4"
173
  processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
 
 
174
  model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
175
  MODEL_ID_X,
176
  trust_remote_code=True,
177
- torch_dtype="auto",
178
  ).to(device).eval()
179
 
 
180
  # Load Dots.OCR from the local, patched directory
181
  MODEL_PATH_D = "prithivMLmods/Dots.OCR-Latest-BF16"
182
  processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
@@ -305,4 +285,10 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
305
  )
306
 
307
  if __name__ == "__main__":
 
 
 
 
 
 
308
  demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)
 
24
  TextIteratorStreamer,
25
  )
26
 
27
+ # It's good practice to ensure compressed_tensors is installed when dealing with such models
28
+ try:
29
+ from compressed_tensors import save_compressed, load_compressed, BitmaskConfig
30
+ except ImportError:
31
+ print("compressed_tensors is not installed. Please install it using 'pip install compressed-tensors'")
32
+ sys.exit(1)
33
+
34
+
35
  from transformers.image_utils import load_image
36
  from gradio.themes import Soft
37
  from gradio.themes.utils import colors, fonts, sizes
 
130
 
131
  print("Using device:", device)
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  MAX_MAX_NEW_TOKENS = 4096
134
  DEFAULT_MAX_NEW_TOKENS = 2048
135
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 
148
  # Load Nanonets-OCR2-3B
149
  MODEL_ID_X = "prithivMLmods/Nanonets-OCR2-3B-AWQ-nvfp4"
150
  processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
151
+ # The fix is to load the model in a supported dtype like float16.
152
+ # The `compressed-tensors` library will handle the dequantization from Float8_e4m3fn.
153
  model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
154
  MODEL_ID_X,
155
  trust_remote_code=True,
156
+ torch_dtype=torch.float16, # Change "auto" to torch.float16
157
  ).to(device).eval()
158
 
159
+
160
  # Load Dots.OCR from the local, patched directory
161
  MODEL_PATH_D = "prithivMLmods/Dots.OCR-Latest-BF16"
162
  processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 
285
  )
286
 
287
  if __name__ == "__main__":
288
+ # To run this, you would need to have example images in an "examples" directory
289
+ # or upload your own images.
290
+ if not os.path.exists("examples"):
291
+ os.makedirs("examples")
292
+ print("Created 'examples' directory. Please add your example images there.")
293
+
294
  demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)