IoannisKat1 commited on
Commit
324f1b8
·
verified ·
1 Parent(s): 757619d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -20
app.py CHANGED
@@ -1,8 +1,211 @@
1
  from unsloth import FastLanguageModel
2
  import torch
3
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
5
 
 
6
  def generate_response_true_false(instruction):
7
  """
8
  Generates a response using your fine-tuned model based on the provided instruction.
@@ -72,11 +275,22 @@ Answer the provided question with the knowledge provided to you
72
 
73
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
74
  with torch.no_grad():
75
- outputs = model.generate(**inputs,early_stopping=False,min_length=50,length_penalty=2,max_length=300)
76
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
77
  return response
78
 
79
- def generate_response_multiple_choice(instruction,choice_A,choice_B,choice_C,choice_D):
 
 
 
 
 
 
 
 
 
80
  """
81
  Generates a response using a fine-tuned language model for multiple-choice questions.
82
 
@@ -114,13 +328,8 @@ def generate_response_multiple_choice(instruction,choice_A,choice_B,choice_C,cho
114
  ### Question:
115
  {instruction}
116
 
117
- ### Choices:
118
- A) {choice_A}
119
- B) {choice_B}
120
- C) {choice_C}
121
- D) {choice_D}
122
-
123
  ### Answer:
 
124
  """
125
 
126
  # Tokenize the prompt and move it to GPU for inference
@@ -137,12 +346,20 @@ def generate_response_multiple_choice(instruction,choice_A,choice_B,choice_C,cho
137
  max_new_tokens=300,
138
  top_p=0.95,
139
  top_k=50,
140
- temperature=0.7,
141
  num_return_sequences=1
142
  )
143
 
144
  # Decode the response into text
145
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
146
  return response
147
 
148
  def true_false_greet(question):
@@ -170,10 +387,7 @@ def open_ended_greet(question):
170
  else:
171
  # Call a placeholder function (must be implemented separately) to generate a response
172
  response = generate_response_open_ended(question) # Note: generate_response is not defined in this snippet
173
-
174
- # Extract the answer from the generated response by splitting on "### Answer:"
175
- # response = response.split('### Answer:')[1]
176
-
177
  # Return the formatted response
178
  return f"{response}!"
179
 
@@ -189,7 +403,7 @@ def multiple_choice_greet(question, choice_A, choice_B, choice_C, choice_D):
189
  choice_D (str): Option D for the question.
190
 
191
  Returns:
192
- str: A response based on the input.
193
  If no question is provided, returns a default message.
194
  If no choices are provided, returns a default message.
195
  """
@@ -202,29 +416,39 @@ def multiple_choice_greet(question, choice_A, choice_B, choice_C, choice_D):
202
  else:
203
  # Call a placeholder function (must be implemented separately) to generate a response
204
  response = generate_response_multiple_choice(question, choice_A, choice_B, choice_C, choice_D)
205
-
206
- # Extract the answer from the generated response by splitting on "### Answer:"
207
- # response = response.split('### Answer:')[1]
208
-
209
  # Return the formatted response
210
- return f"{response}"
211
 
 
212
  def show_true_false_interface():
213
  return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
214
 
 
215
  def show_open_ended_interface():
216
  return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
217
 
 
218
  def show_multiple_choice_interface():
219
  return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
220
 
 
 
 
 
 
 
 
 
221
  with gr.Blocks() as demo:
222
 
 
223
  with gr.Row():
224
  btn_t_f = gr.Button('True/False questions')
225
  btn_open_ended = gr.Button('Open-Ended questions')
226
  btn_m_c = gr.Button('Multiple-Choice questions')
227
 
 
228
  with gr.Column(visible=True) as true_false_interface:
229
  gr.Markdown("## True-False Template")
230
  question_simple = gr.Textbox(label="Enter your question")
@@ -232,6 +456,7 @@ with gr.Blocks() as demo:
232
  submit_simple = gr.Button("Submit")
233
  submit_simple.click(true_false_greet, inputs=question_simple, outputs=simple_output)
234
 
 
235
  with gr.Column(visible=False) as open_ended_interface:
236
  gr.Markdown("## Open Ended Template")
237
  question_simple = gr.Textbox(label="Enter your question")
@@ -239,6 +464,7 @@ with gr.Blocks() as demo:
239
  submit_simple = gr.Button("Submit")
240
  submit_simple.click(open_ended_greet, inputs=question_simple, outputs=simple_output)
241
 
 
242
  with gr.Column(visible=False) as mc_interface:
243
  gr.Markdown("## Multiple-Choice Template")
244
  question_mc = gr.Textbox(label="Enter your question")
@@ -250,8 +476,9 @@ with gr.Blocks() as demo:
250
  submit_mc = gr.Button("Submit")
251
  submit_mc.click(multiple_choice_greet, inputs=[question_mc, choice_A, choice_B, choice_C, choice_D], outputs=mc_output)
252
 
 
253
  btn_t_f.click(show_true_false_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
254
  btn_open_ended.click(show_open_ended_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
255
  btn_m_c.click(show_multiple_choice_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
256
 
257
- demo.launch(debug=True)
 
1
  from unsloth import FastLanguageModel
2
  import torch
3
  import gradio as gr
4
+ import xml.etree.ElementTree as ET
5
+ import re
6
+
7
+ """
8
+ This module provides utilities for extracting structured data from text blocks.
9
+ It supports parsing XML-like structures, Markdown-like formatting, and alternative
10
+ text representations for extracting "choice" and "justification" fields.
11
+
12
+ Functions:
13
+ extract_from_xml_et(text: str) -> dict
14
+ Parses XML-like text and extracts key-value pairs.
15
+
16
+ extract_choice(text: str) -> str
17
+ Extracts the choice (e.g., A), B), C), D)) from a text block.
18
+
19
+ extract_justification(text: str) -> str
20
+ Extracts the justification text from a text block.
21
+
22
+ extract_from_markdown_regex(text: str) -> dict
23
+ Extracts data from Markdown-like structured text, specifically "choice"
24
+ and "justification" fields.
25
+
26
+ extract_fields(text: str) -> list
27
+ Processes text blocks to extract structured data using a combination of
28
+ XML parsing, regex-based choice and justification extraction, and Markdown-like parsing.
29
+ """
30
+
31
+ def extract_from_xml_et(text: str) -> dict:
32
+ """
33
+ Parses an XML-like string and extracts key-value pairs from its elements.
34
+
35
+ Parameters:
36
+ text (str): A string containing XML-like content (e.g., <tag>value</tag>).
37
+
38
+ Returns:
39
+ dict: A dictionary where the keys are lowercase XML tags and the values
40
+ are their corresponding text content.
41
+ None: Returns None if XML parsing fails.
42
+
43
+ Example:
44
+ >>> text = '<key>"value"</key>'
45
+ >>> extract_from_xml_et(text)
46
+ {'key': 'value'}
47
+ """
48
+ try:
49
+ wrapped_text = f"<root>{text}</root>"
50
+ root = ET.fromstring(wrapped_text)
51
+ data = {}
52
+ for child in root:
53
+ if child.text:
54
+ value = child.text.strip().strip('"')
55
+ data[child.tag.lower()] = value
56
+ return data
57
+ except ET.ParseError:
58
+ return None
59
+
60
+
61
+ def extract_choice(text: str) -> str:
62
+ """
63
+ Extracts the choice (e.g., A), B), C), D)) from a text block.
64
+
65
+ Parameters:
66
+ text (str): Input text to search for the choice.
67
+
68
+ Returns:
69
+ str: The extracted choice, or None if not found.
70
+
71
+ Example:
72
+ >>> text = "A) This is a sample choice."
73
+ >>> extract_choice(text)
74
+ 'A)'
75
+ """
76
+ choice_pattern = r'([A-D]\))'
77
+ match = re.search(choice_pattern, text)
78
+ if match:
79
+ return match.group(1).strip()
80
+ return None
81
+
82
+
83
+ def extract_justification(text: str) -> str:
84
+ """
85
+ Extracts the justification text from a text block.
86
+
87
+ Parameters:
88
+ text (str): Input text to search for the justification.
89
+
90
+ Returns:
91
+ str: The extracted justification, or None if not found.
92
+
93
+ Example:
94
+ >>> text = "- Justification: This is the reason."
95
+ >>> extract_justification(text)
96
+ 'This is the reason.'
97
+ """
98
+ justification_pattern = r'(?:- )?Justification:\s*(.+)'
99
+ match = re.search(justification_pattern, text)
100
+ if match:
101
+ return match.group(1).strip()
102
+ return None
103
+
104
+
105
+ def extract_from_markdown_regex(text: str) -> dict:
106
+ """
107
+ Extracts structured data from Markdown-like text blocks.
108
+
109
+ Parameters:
110
+ text (str): Input text containing Markdown-like content, with **choice**
111
+ and **justification** fields.
112
+
113
+ Returns:
114
+ dict: A dictionary containing "choice" and "justification", or None if no match is found.
115
+
116
+ Example:
117
+ >>> text = "**choice**: A **justification**: This is the reason."
118
+ >>> extract_from_markdown_regex(text)
119
+ {'choice': 'A', 'justification': 'This is the reason.'}
120
+ """
121
+ choice_pattern = r'\*\*choice\*\*:\s*(.+?)'
122
+ justification_pattern = r'\*\*justification\*\*:\s*([\s\S]+?)(?=\*\*choice\*\*|$)'
123
+ choice_match = re.search(choice_pattern, text)
124
+ justification_match = re.search(justification_pattern, text)
125
+
126
+ if choice_match and justification_match:
127
+ return {
128
+ "choice": choice_match.group(1).strip(),
129
+ "justification": justification_match.group(1).strip()
130
+ }
131
+ return None
132
+
133
+
134
+ def extract_fields(text: str) -> list:
135
+ """
136
+ Processes text blocks to extract structured data.
137
+
138
+ This function attempts to parse each block using the following methods:
139
+ 1. XML Parsing: Uses extract_from_xml_et to handle XML-like content.
140
+ 2. Regex for Choice and Justification: Extracts these fields separately.
141
+ 3. Markdown Parsing: Uses extract_from_markdown_regex for Markdown-like structures.
142
+
143
+ Parameters:
144
+ text (str): Input text containing one or more blocks of data.
145
+
146
+ Returns:
147
+ list: A list of dictionaries, each containing extracted data from a block.
148
+
149
+ Workflow:
150
+ 1. Splits the input text into blocks using double line breaks (\n\n).
151
+ 2. For each block:
152
+ - Attempts to parse it using extract_from_xml_et.
153
+ - If unsuccessful, tries extract_choice and extract_justification.
154
+ - Finally, falls back to extract_from_markdown_regex.
155
+ 3. Aggregates the results into a list of dictionaries.
156
+
157
+ Example:
158
+ >>> text = '''
159
+ <key>"value"</key>
160
+
161
+ **choice**: A **justification**: This is the reason.
162
+
163
+ A) Taking all reasonable measures to safeguard user data,
164
+ - Justification: This is the reason.
165
+ '''
166
+ >>> extract_fields(text)
167
+ [
168
+ {'key': 'value'},
169
+ {'choice': 'A', 'justification': 'This is the reason.'},
170
+ {'choice': 'A)', 'justification': 'This is the reason.'}
171
+ ]
172
+ """
173
+ entries = []
174
+ blocks = re.split(r'\n\s*\n', text.strip()) # Split text into blocks by double newlines
175
+
176
+ for block in blocks:
177
+ print("Processing Block:", block)
178
+ extracted_data = {}
179
+
180
+ # Try extracting using XML
181
+ xml_data = extract_from_xml_et(block)
182
+ if xml_data:
183
+ print("Extracted via XML:", xml_data)
184
+ entries.append(xml_data)
185
+ continue
186
+
187
+ # Try extracting using separated choice and justification regex
188
+ choice = extract_choice(block)
189
+ justification = extract_justification(block)
190
+ if choice or justification:
191
+ extracted_data["choice"] = choice
192
+ extracted_data["justification"] = justification
193
+ entries.append(extracted_data)
194
+ continue
195
+
196
+ # Try extracting using Markdown regex
197
+ markdown_data = extract_from_markdown_regex(block)
198
+ if markdown_data:
199
+ print("Extracted via Markdown Regex:", markdown_data)
200
+ entries.append(markdown_data)
201
+
202
+ return entries
203
+
204
+
205
+ ### The code initializes the LLM model and tokenizer from a fine-tuned checkpoint located in a directory called unified_model.
206
  model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
207
 
208
+
209
  def generate_response_true_false(instruction):
210
  """
211
  Generates a response using your fine-tuned model based on the provided instruction.
 
275
 
276
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
277
  with torch.no_grad():
278
+ outputs = model.generate(**inputs,early_stopping=False,min_length=50,length_penalty=2,max_length=200)
279
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
280
+ # Extract the answer from the generated response by splitting on "### Answer:"
281
+ response = response.split('### Answer:')[1]
282
  return response
283
 
284
+ def generate_response_multiple_choice(question,choice_A,choice_B,choice_C,choice_D):
285
+
286
+ instruction = f'''{question}
287
+ Choices:
288
+ A) {choice_A},
289
+ B) {choice_B},
290
+ C) {choice_C},
291
+ D) {choice_D}
292
+ '''
293
+
294
  """
295
  Generates a response using a fine-tuned language model for multiple-choice questions.
296
 
 
328
  ### Question:
329
  {instruction}
330
 
 
 
 
 
 
 
331
  ### Answer:
332
+
333
  """
334
 
335
  # Tokenize the prompt and move it to GPU for inference
 
346
  max_new_tokens=300,
347
  top_p=0.95,
348
  top_k=50,
349
+ temperature=0.65,
350
  num_return_sequences=1
351
  )
352
 
353
  # Decode the response into text
354
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
355
+ # Extract the answer from the generated response by splitting on "### Answer:"
356
+ response = response.split('### Answer:')[1]
357
+ print("RESPONSE",response)
358
+ data = extract_fields(response)
359
+ if len(data) == 0:
360
+ response = {"choice": data[0]['choice'], "justification": data[0]['justification']}
361
+ else:
362
+ response = {"choice": data[-1]['choice'], "justification": data[-1]['justification']}
363
  return response
364
 
365
  def true_false_greet(question):
 
387
  else:
388
  # Call a placeholder function (must be implemented separately) to generate a response
389
  response = generate_response_open_ended(question) # Note: generate_response is not defined in this snippet
390
+
 
 
 
391
  # Return the formatted response
392
  return f"{response}!"
393
 
 
403
  choice_D (str): Option D for the question.
404
 
405
  Returns:
406
+ str: A response based on the input.
407
  If no question is provided, returns a default message.
408
  If no choices are provided, returns a default message.
409
  """
 
416
  else:
417
  # Call a placeholder function (must be implemented separately) to generate a response
418
  response = generate_response_multiple_choice(question, choice_A, choice_B, choice_C, choice_D)
419
+ actual_response = "Selected Choice: " + response['choice'] + "\nJustification: " + response['justification']
 
 
 
420
  # Return the formatted response
421
+ return f"{actual_response}"
422
 
423
+ #### Function which enables the visibility of true/false questions interface
424
  def show_true_false_interface():
425
  return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
426
 
427
+ #### Function which enables the visibility of open-ended questions interface
428
  def show_open_ended_interface():
429
  return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
430
 
431
+ #### Function which enables the visibility of multiple-choice questions interface
432
  def show_multiple_choice_interface():
433
  return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
434
 
435
+ # print(generate_response_multiple_choice("Which of the following best describes a bank’s legal duty in cases of phishing, according to Greek law?",
436
+ # "Taking all reasonable measures to safeguard user data and transactions",
437
+ # "Ensuring absolute prevention of all cyberattacks",
438
+ # "Holding customers solely responsible for phishing losses",
439
+ # "Avoiding liability by implementing disclaimers"
440
+ # ))
441
+
442
+
443
  with gr.Blocks() as demo:
444
 
445
+ ### We define a row in which we create the navigation buttons for each question type
446
  with gr.Row():
447
  btn_t_f = gr.Button('True/False questions')
448
  btn_open_ended = gr.Button('Open-Ended questions')
449
  btn_m_c = gr.Button('Multiple-Choice questions')
450
 
451
+ ### We define the interface for the true/false questions
452
  with gr.Column(visible=True) as true_false_interface:
453
  gr.Markdown("## True-False Template")
454
  question_simple = gr.Textbox(label="Enter your question")
 
456
  submit_simple = gr.Button("Submit")
457
  submit_simple.click(true_false_greet, inputs=question_simple, outputs=simple_output)
458
 
459
+ ### We define the interface for the open-ended questions
460
  with gr.Column(visible=False) as open_ended_interface:
461
  gr.Markdown("## Open Ended Template")
462
  question_simple = gr.Textbox(label="Enter your question")
 
464
  submit_simple = gr.Button("Submit")
465
  submit_simple.click(open_ended_greet, inputs=question_simple, outputs=simple_output)
466
 
467
+ ### We define the interface for the multiple-choice questions
468
  with gr.Column(visible=False) as mc_interface:
469
  gr.Markdown("## Multiple-Choice Template")
470
  question_mc = gr.Textbox(label="Enter your question")
 
476
  submit_mc = gr.Button("Submit")
477
  submit_mc.click(multiple_choice_greet, inputs=[question_mc, choice_A, choice_B, choice_C, choice_D], outputs=mc_output)
478
 
479
+ ### If a navigation button is clicked, a visibility function is executed
480
  btn_t_f.click(show_true_false_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
481
  btn_open_ended.click(show_open_ended_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
482
  btn_m_c.click(show_multiple_choice_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
483
 
484
+ demo.launch()