prithivMLmods commited on
Commit
3c839f6
·
verified ·
1 Parent(s): 4144e8c

update app

Browse files
Files changed (1) hide show
  1. app.py +49 -80
app.py CHANGED
@@ -11,6 +11,7 @@ import spaces
11
  import torch
12
  import numpy as np
13
  from PIL import Image
 
14
 
15
  from transformers import (
16
  Qwen2VLForConditionalGeneration,
@@ -25,35 +26,28 @@ from gradio.themes.utils import colors, fonts, sizes
25
 
26
  # --- Theme and CSS Definition ---
27
 
28
- # Define the Thistle color palette
29
- colors.thistle = colors.Color(
30
- name="thistle",
31
- c50="#F9F5F9",
32
- c100="#F0E8F1",
33
- c200="#E7DBE8",
34
- c300="#DECEE0",
35
- c400="#D2BFD8",
36
- c500="#D8BFD8", # Thistle base color
37
- c600="#B59CB7",
38
- c700="#927996",
39
- c800="#6F5675",
40
- c900="#4C3454",
41
- c950="#291233",
42
  )
43
 
44
- colors.red_gray = colors.Color(
45
- name="red_gray",
46
- c50="#f7eded", c100="#f5dcdc", c200="#efb4b4", c300="#e78f8f",
47
- c400="#d96a6a", c500="#c65353", c600="#b24444", c700="#8f3434",
48
- c800="#732d2d", c900="#5f2626", c950="#4d2020",
49
- )
50
-
51
- class ThistleTheme(Soft):
52
  def __init__(
53
  self,
54
  *,
55
  primary_hue: colors.Color | str = colors.gray,
56
- secondary_hue: colors.Color | str = colors.thistle, # Use the new color
57
  neutral_hue: colors.Color | str = colors.slate,
58
  text_size: sizes.Size | str = sizes.text_lg,
59
  font: fonts.Font | str | Iterable[fonts.Font | str] = (
@@ -76,19 +70,19 @@ class ThistleTheme(Soft):
76
  background_fill_primary_dark="*primary_900",
77
  body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
78
  body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
79
- button_primary_text_color="black",
80
  button_primary_text_color_hover="white",
81
- button_primary_background_fill="linear-gradient(90deg, *secondary_400, *secondary_500)",
82
- button_primary_background_fill_hover="linear-gradient(90deg, *secondary_500, *secondary_600)",
83
- button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
84
- button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
85
  button_secondary_text_color="black",
86
  button_secondary_text_color_hover="white",
87
  button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
88
  button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
89
  button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
90
  button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
91
- slider_color="*secondary_400",
92
  slider_color_dark="*secondary_600",
93
  block_title_text_weight="600",
94
  block_border_width="3px",
@@ -100,7 +94,7 @@ class ThistleTheme(Soft):
100
  )
101
 
102
  # Instantiate the new theme
103
- thistle_theme = ThistleTheme()
104
 
105
  css = """
106
  #main-title h1 {
@@ -109,56 +103,12 @@ css = """
109
  #output-title h2 {
110
  font-size: 2.1em !important;
111
  }
112
- :root {
113
- --color-grey-50: #f9fafb;
114
- --banner-background: var(--secondary-400);
115
- --banner-text-color: var(--primary-100);
116
- --banner-background-dark: var(--secondary-800);
117
- --banner-text-color-dark: var(--primary-100);
118
- --banner-chrome-height: calc(16px + 43px);
119
- --chat-chrome-height-wide-no-banner: 320px;
120
- --chat-chrome-height-narrow-no-banner: 450px;
121
- --chat-chrome-height-wide: calc(var(--chat-chrome-height-wide-no-banner) + var(--banner-chrome-height));
122
- --chat-chrome-height-narrow: calc(var(--chat-chrome-height-narrow-no-banner) + var(--banner-chrome-height));
123
- }
124
- .banner-message { background-color: var(--banner-background); padding: 5px; margin: 0; border-radius: 5px; border: none; }
125
- .banner-message-text { font-size: 13px; font-weight: bolder; color: var(--banner-text-color) !important; }
126
- body.dark .banner-message { background-color: var(--banner-background-dark) !important; }
127
- body.dark .gradio-container .contain .banner-message .banner-message-text { color: var(--banner-text-color-dark) !important; }
128
- .toast-body { background-color: var(--color-grey-50); }
129
- .html-container:has(.css-styles) { padding: 0; margin: 0; }
130
- .css-styles { height: 0; }
131
- .model-message { text-align: end; }
132
- .model-dropdown-container { display: flex; align-items: center; gap: 10px; padding: 0; }
133
- .user-input-container .multimodal-textbox{ border: none !important; }
134
- .control-button { height: 51px; }
135
- button.cancel { border: var(--button-border-width) solid var(--button-cancel-border-color); background: var(--button-cancel-background-fill); color: var(--button-cancel-text-color); box-shadow: var(--button-cancel-shadow); }
136
- button.cancel:hover, .cancel[disabled] { background: var(--button-cancel-background-fill-hover); color: var(--button-cancel-text-color-hover); }
137
- .opt-out-message { top: 8px; }
138
- .opt-out-message .html-container, .opt-out-checkbox label { font-size: 14px !important; padding: 0 !important; margin: 0 !important; color: var(--neutral-400) !important; }
139
- div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-wide)) !important; max-height: 900px !important; }
140
- div.no-padding { padding: 0 !important; }
141
- @media (max-width: 1280px) { div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-wide)) !important; } }
142
- @media (max-width: 1024px) {
143
- .responsive-row { flex-direction: column; }
144
- .model-message { text-align: start; font-size: 10px !important; }
145
- .model-dropdown-container { flex-direction: column; align-items: flex-start; }
146
- div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-narrow)) !important; }
147
- }
148
- @media (max-width: 400px) {
149
- .responsive-row { flex-direction: column; }
150
- .model-message { text-align: start; font-size: 10px !important; }
151
- .model-dropdown-container { flex-direction: column; align-items: flex-start; }
152
- div.block.chatbot { max-height: 360px !important; }
153
- }
154
- @media (max-height: 932px) { .chatbot { max-height: 500px !important; } }
155
- @media (max-height: 1280px) { div.block.chatbot { max-height: 800px !important; } }
156
  """
157
 
158
  # Constants for text generation
 
159
  DEFAULT_MAX_NEW_TOKENS = 1024
160
- # Increased max_length to accommodate more complex inputs, especially with multiple images
161
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
162
 
163
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
164
 
@@ -221,7 +171,9 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
221
 
222
 
223
  @spaces.GPU
224
- def generate_image(model_name: str, text: str, image: Image.Image):
 
 
225
  """
226
  Generates responses using the selected model for image input.
227
  Yields raw text and Markdown-formatted text.
@@ -266,7 +218,16 @@ def generate_image(model_name: str, text: str, image: Image.Image):
266
  ).to(device)
267
 
268
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
269
- generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": DEFAULT_MAX_NEW_TOKENS}
 
 
 
 
 
 
 
 
 
270
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
271
  thread.start()
272
  buffer = ""
@@ -286,12 +247,20 @@ image_examples = [
286
 
287
 
288
  # Create the Gradio Interface
289
- with gr.Blocks(css=css, theme=thistle_theme) as demo:
290
  gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
291
  with gr.Row():
292
  with gr.Column(scale=2):
293
  image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
294
  image_upload = gr.Image(type="pil", label="Upload Image", height=290)
 
 
 
 
 
 
 
 
295
  image_submit = gr.Button("Submit", variant="primary")
296
  gr.Examples(
297
  examples=image_examples,
@@ -312,7 +281,7 @@ with gr.Blocks(css=css, theme=thistle_theme) as demo:
312
 
313
  image_submit.click(
314
  fn=generate_image,
315
- inputs=[model_choice, image_query, image_upload],
316
  outputs=[output, markdown_output]
317
  )
318
 
 
11
  import torch
12
  import numpy as np
13
  from PIL import Image
14
+ import cv2
15
 
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
 
26
 
27
  # --- Theme and CSS Definition ---
28
 
29
+ # Define the SteelBlue color palette
30
+ colors.steel_blue = colors.Color(
31
+ name="steel_blue",
32
+ c50="#EBF3F8",
33
+ c100="#D3E5F0",
34
+ c200="#A8CCE1",
35
+ c300="#7DB3D2",
36
+ c400="#529AC3",
37
+ c500="#4682B4", # SteelBlue base color
38
+ c600="#3E72A0",
39
+ c700="#36638C",
40
+ c800="#2E5378",
41
+ c900="#264364",
42
+ c950="#1E3450",
43
  )
44
 
45
+ class SteelBlueTheme(Soft):
 
 
 
 
 
 
 
46
  def __init__(
47
  self,
48
  *,
49
  primary_hue: colors.Color | str = colors.gray,
50
+ secondary_hue: colors.Color | str = colors.steel_blue,
51
  neutral_hue: colors.Color | str = colors.slate,
52
  text_size: sizes.Size | str = sizes.text_lg,
53
  font: fonts.Font | str | Iterable[fonts.Font | str] = (
 
70
  background_fill_primary_dark="*primary_900",
71
  body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
72
  body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
73
+ button_primary_text_color="white",
74
  button_primary_text_color_hover="white",
75
+ button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
76
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
77
+ button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
78
+ button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
79
  button_secondary_text_color="black",
80
  button_secondary_text_color_hover="white",
81
  button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
82
  button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
83
  button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
84
  button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
85
+ slider_color="*secondary_500",
86
  slider_color_dark="*secondary_600",
87
  block_title_text_weight="600",
88
  block_border_width="3px",
 
94
  )
95
 
96
  # Instantiate the new theme
97
+ steel_blue_theme = SteelBlueTheme()
98
 
99
  css = """
100
  #main-title h1 {
 
103
  #output-title h2 {
104
  font-size: 2.1em !important;
105
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  """
107
 
108
  # Constants for text generation
109
+ MAX_MAX_NEW_TOKENS = 4096
110
  DEFAULT_MAX_NEW_TOKENS = 1024
111
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 
112
 
113
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
114
 
 
171
 
172
 
173
  @spaces.GPU
174
+ def generate_image(model_name: str, text: str, image: Image.Image,
175
+ max_new_tokens: int, temperature: float, top_p: float,
176
+ top_k: int, repetition_penalty: float):
177
  """
178
  Generates responses using the selected model for image input.
179
  Yields raw text and Markdown-formatted text.
 
218
  ).to(device)
219
 
220
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
221
+ generation_kwargs = {
222
+ **inputs,
223
+ "streamer": streamer,
224
+ "max_new_tokens": max_new_tokens,
225
+ "do_sample": True,
226
+ "temperature": temperature,
227
+ "top_p": top_p,
228
+ "top_k": top_k,
229
+ "repetition_penalty": repetition_penalty,
230
+ }
231
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
232
  thread.start()
233
  buffer = ""
 
247
 
248
 
249
  # Create the Gradio Interface
250
+ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
251
  gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
252
  with gr.Row():
253
  with gr.Column(scale=2):
254
  image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
255
  image_upload = gr.Image(type="pil", label="Upload Image", height=290)
256
+
257
+ with gr.Accordion("Advanced options", open=False):
258
+ max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
259
+ temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.7)
260
+ top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
261
+ top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
262
+ repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
263
+
264
  image_submit = gr.Button("Submit", variant="primary")
265
  gr.Examples(
266
  examples=image_examples,
 
281
 
282
  image_submit.click(
283
  fn=generate_image,
284
+ inputs=[model_choice, image_query, image_upload, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
285
  outputs=[output, markdown_output]
286
  )
287