IoannisKat1 commited on
Commit
5f15a71
·
verified ·
1 Parent(s): 038b22b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +441 -0
app.py ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unsloth import FastLanguageModel
2
+ import torch
3
+ import gradio as gr
4
+ import xml.etree.ElementTree as ET
5
+ import re
6
+
7
+ """
8
+ This module provides utilities for extracting structured data from text blocks.
9
+ It supports parsing XML-like structures, Markdown-like formatting, and alternative
10
+ text representations for extracting "choice" and "justification" fields.
11
+ Functions:
12
+ extract_from_xml_et(text: str) -> dict
13
+ Parses XML-like text and extracts key-value pairs.
14
+ extract_choice(text: str) -> str
15
+ Extracts the choice (e.g., A), B), C), D)) from a text block.
16
+ extract_justification(text: str) -> str
17
+ Extracts the justification text from a text block.
18
+ extract_from_markdown_regex(text: str) -> dict
19
+ Extracts data from Markdown-like structured text, specifically "choice"
20
+ and "justification" fields.
21
+ extract_fields(text: str) -> list
22
+ Processes text blocks to extract structured data using a combination of
23
+ XML parsing, regex-based choice and justification extraction, and Markdown-like parsing.
24
+ """
25
+
26
+ def extract_from_xml_et(text: str) -> dict:
27
+ """
28
+ Parses an XML-like string and extracts key-value pairs from its elements.
29
+ Parameters:
30
+ text (str): A string containing XML-like content (e.g., <tag>value</tag>).
31
+ Returns:
32
+ dict: A dictionary where the keys are lowercase XML tags and the values
33
+ are their corresponding text content.
34
+ None: Returns None if XML parsing fails.
35
+ Example:
36
+ >>> text = '<key>"value"</key>'
37
+ >>> extract_from_xml_et(text)
38
+ {'key': 'value'}
39
+ """
40
+ try:
41
+ wrapped_text = f"<root>{text}</root>"
42
+ root = ET.fromstring(wrapped_text)
43
+ data = {}
44
+ for child in root:
45
+ if child.text:
46
+ value = child.text.strip().strip('"')
47
+ data[child.tag.lower()] = value
48
+ return data
49
+ except ET.ParseError:
50
+ return None
51
+
52
+
53
+ def extract_choice(text: str) -> str:
54
+ """
55
+ Extracts the choice (e.g., A), B), C), D)) from a text block.
56
+ Parameters:
57
+ text (str): Input text to search for the choice.
58
+ Returns:
59
+ str: The extracted choice, or None if not found.
60
+ Example:
61
+ >>> text = "A) This is a sample choice."
62
+ >>> extract_choice(text)
63
+ 'A)'
64
+ """
65
+ choice_pattern = r'([A-D]\))'
66
+ match = re.search(choice_pattern, text)
67
+ if match:
68
+ return match.group(1).strip()
69
+ return None
70
+
71
+
72
+ def extract_justification(text: str) -> str:
73
+ """
74
+ Extracts the justification text from a text block.
75
+ Parameters:
76
+ text (str): Input text to search for the justification.
77
+ Returns:
78
+ str: The extracted justification, or None if not found.
79
+ Example:
80
+ >>> text = "- Justification: This is the reason."
81
+ >>> extract_justification(text)
82
+ 'This is the reason.'
83
+ """
84
+ justification_pattern = r'(?:- )?Justification:\s*(.+)'
85
+ match = re.search(justification_pattern, text)
86
+ if match:
87
+ return match.group(1).strip()
88
+ return None
89
+
90
+
91
+ def extract_from_markdown_regex(text: str) -> dict:
92
+ """
93
+ Extracts structured data from Markdown-like text blocks.
94
+ Parameters:
95
+ text (str): Input text containing Markdown-like content, with **choice**
96
+ and **justification** fields.
97
+ Returns:
98
+ dict: A dictionary containing "choice" and "justification", or None if no match is found.
99
+ Example:
100
+ >>> text = "**choice**: A **justification**: This is the reason."
101
+ >>> extract_from_markdown_regex(text)
102
+ {'choice': 'A', 'justification': 'This is the reason.'}
103
+ """
104
+ choice_pattern = r'\*\*choice\*\*:\s*(.+?)'
105
+ justification_pattern = r'\*\*justification\*\*:\s*([\s\S]+?)(?=\*\*choice\*\*|$)'
106
+ choice_match = re.search(choice_pattern, text)
107
+ justification_match = re.search(justification_pattern, text)
108
+
109
+ if choice_match and justification_match:
110
+ return {
111
+ "choice": choice_match.group(1).strip(),
112
+ "justification": justification_match.group(1).strip()
113
+ }
114
+ return None
115
+
116
+
117
+ def extract_fields(text: str) -> list:
118
+ """
119
+ Processes text blocks to extract structured data.
120
+ This function attempts to parse each block using the following methods:
121
+ 1. XML Parsing: Uses extract_from_xml_et to handle XML-like content.
122
+ 2. Regex for Choice and Justification: Extracts these fields separately.
123
+ 3. Markdown Parsing: Uses extract_from_markdown_regex for Markdown-like structures.
124
+ Parameters:
125
+ text (str): Input text containing one or more blocks of data.
126
+ Returns:
127
+ list: A list of dictionaries, each containing extracted data from a block.
128
+ Workflow:
129
+ 1. Splits the input text into blocks using double line breaks (\n\n).
130
+ 2. For each block:
131
+ - Attempts to parse it using extract_from_xml_et.
132
+ - If unsuccessful, tries extract_choice and extract_justification.
133
+ - Finally, falls back to extract_from_markdown_regex.
134
+ 3. Aggregates the results into a list of dictionaries.
135
+ Example:
136
+ >>> text = '''
137
+ <key>"value"</key>
138
+ **choice**: A **justification**: This is the reason.
139
+ A) Taking all reasonable measures to safeguard user data,
140
+ - Justification: This is the reason.
141
+ '''
142
+ >>> extract_fields(text)
143
+ [
144
+ {'key': 'value'},
145
+ {'choice': 'A', 'justification': 'This is the reason.'},
146
+ {'choice': 'A)', 'justification': 'This is the reason.'}
147
+ ]
148
+ """
149
+ entries = []
150
+ blocks = re.split(r'\n\s*\n', text.strip()) # Split text into blocks by double newlines
151
+
152
+ for block in blocks:
153
+ print("Processing Block:", block)
154
+ extracted_data = {}
155
+
156
+ # Try extracting using XML
157
+ xml_data = extract_from_xml_et(block)
158
+ if xml_data:
159
+ print("Extracted via XML:", xml_data)
160
+ entries.append(xml_data)
161
+ continue
162
+
163
+ # Try extracting using separated choice and justification regex
164
+ choice = extract_choice(block)
165
+ justification = extract_justification(block)
166
+ if choice or justification:
167
+ extracted_data["choice"] = choice
168
+ extracted_data["justification"] = justification
169
+ entries.append(extracted_data)
170
+ continue
171
+
172
+ # Try extracting using Markdown regex
173
+ markdown_data = extract_from_markdown_regex(block)
174
+ if markdown_data:
175
+ print("Extracted via Markdown Regex:", markdown_data)
176
+ entries.append(markdown_data)
177
+
178
+ return entries
179
+
180
+
181
+ ### The code initializes the LLM model and tokenizer from a fine-tuned checkpoint located in a directory called unified_model.
182
+ model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
183
+
184
+
185
+ def generate_response_true_false(instruction):
186
+ """
187
+ Generates a response using your fine-tuned model based on the provided instruction.
188
+ This function enables faster inference through the `FastLanguageModel` and prepares a
189
+ prompt for the model to determine whether the given statement is "True" or "False".
190
+ Args:
191
+ instruction (str): A string containing the statement and instructions to be evaluated.
192
+ Returns:
193
+ str: "True" or "False" based on the model's response, or "Unable to determine" if the
194
+ response cannot be parsed reliably.
195
+ """
196
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
197
+ prompt = f"""### Instruction:
198
+ Determine if the following statement is true or false. Respond only with "True" or "False".
199
+ ### Statement:
200
+ {instruction}
201
+ ### Answer:"""
202
+
203
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
204
+ with torch.no_grad():
205
+ outputs = model.generate(**inputs, max_new_tokens=50)
206
+
207
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
208
+ response = response.split("### Answer:")[-1].strip()
209
+
210
+ # Extract True/False from response
211
+ if response.lower() == "true":
212
+ return "True"
213
+ elif response.lower() == "false":
214
+ return "False"
215
+ else:
216
+ # Try to identify the answer even if it's not perfectly formatted
217
+ if "true" in response.lower():
218
+ return "True"
219
+ elif "false" in response.lower():
220
+ return "False"
221
+ else:
222
+ return "Unable to determine."
223
+
224
+ def generate_response_open_ended(instruction):
225
+ """
226
+ Generates a response using your fine-tuned model based on the provided instruction.
227
+ This function enables faster inference through the `FastLanguageModel` and prepares a
228
+ prompt for the model to determine whether the given statement is "True" or "False".
229
+ Args:
230
+ instruction (str): A string containing the statement and instructions to be evaluated.
231
+ Returns:
232
+ str: A response from the model to the provided question or "Unable to determine" if the
233
+ response cannot be parsed reliably.
234
+ """
235
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
236
+ prompt = f"""### Instruction:
237
+ Answer the provided question with the knowledge provided to you
238
+ ### Question:
239
+ {instruction}
240
+ ### Answer:
241
+ """
242
+
243
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
244
+ with torch.no_grad():
245
+ outputs = model.generate(**inputs,early_stopping=False,min_length=50,length_penalty=2,max_length=200)
246
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
247
+ # Extract the answer from the generated response by splitting on "### Answer:"
248
+ response = response.split('### Answer:')[1]
249
+ return response
250
+
251
+ def generate_response_multiple_choice(question,choice_A,choice_B,choice_C,choice_D):
252
+
253
+ instruction = f'''{question}
254
+ Choices:
255
+ A) {choice_A},
256
+ B) {choice_B},
257
+ C) {choice_C},
258
+ D) {choice_D}
259
+ '''
260
+
261
+ """
262
+ Generates a response using a fine-tuned language model for multiple-choice questions.
263
+ Args:
264
+ instruction (str): A string containing the question and its options.
265
+ Returns:
266
+ dict: A dictionary with the selected choice and its justification.
267
+ Example:
268
+ {
269
+ "choice": "A",
270
+ "justification": "Explanation for why Option A is correct."
271
+ }
272
+ If the model fails to provide a valid response, defaults to:
273
+ {
274
+ "choice": "None",
275
+ "justification": "Could not parse JSON"
276
+ }
277
+ """
278
+ # Enable native faster inference for the model
279
+ FastLanguageModel.for_inference(model)
280
+
281
+ # Define the prompt with a detailed instruction for the model
282
+ prompt = f"""### Instruction:
283
+ In the following question, you are provided with 4 choices. Select the best choice based on the knowledge provided and provide a justification for that choice.
284
+ **You must return only your response with the following keys:**
285
+ - "choice": The best choice letter
286
+ - "justification": The justification for your choice
287
+ **Example Response:**
288
+ **choice**: A
289
+ **justification**: Explanation for why Option A is correct
290
+ ### Question:
291
+ {instruction}
292
+ ### Answer:
293
+
294
+ """
295
+
296
+ # Tokenize the prompt and move it to GPU for inference
297
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
298
+
299
+ # Generate a response from the model
300
+ with torch.no_grad():
301
+ outputs = model.generate(
302
+ **inputs,
303
+ early_stopping=True,
304
+ min_length=50,
305
+ length_penalty=2,
306
+ do_sample=True,
307
+ max_new_tokens=300,
308
+ top_p=0.95,
309
+ top_k=50,
310
+ temperature=0.65,
311
+ num_return_sequences=1
312
+ )
313
+
314
+ # Decode the response into text
315
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
316
+ # Extract the answer from the generated response by splitting on "### Answer:"
317
+ response = response.split('### Answer:')[1]
318
+ print("RESPONSE",response)
319
+ data = extract_fields(response)
320
+ if len(data) == 0:
321
+ response = {"choice": data[0]['choice'], "justification": data[0]['justification']}
322
+ else:
323
+ response = {"choice": data[-1]['choice'], "justification": data[-1]['justification']}
324
+ return response
325
+
326
+ def true_false_greet(question):
327
+ if question == "":
328
+ # Return a default response if no input is given
329
+ return "No question was given to answer"
330
+ else:
331
+ # Call a placeholder function (must be implemented separately)
332
+ response = generate_response_true_false(question) # Note: This function is not defined in this code
333
+ return f"{response}!"
334
+
335
+ def open_ended_greet(question):
336
+ """
337
+ Processes the user's question and returns a response.
338
+ Args:
339
+ question (str): The input text provided by the user.
340
+ Returns:
341
+ str: A processed response. If no input is given, a default message is returned.
342
+ """
343
+ if question == "":
344
+ # Return a default response if no question is provided
345
+ return "No question was given to answer"
346
+ else:
347
+ # Call a placeholder function (must be implemented separately) to generate a response
348
+ response = generate_response_open_ended(question) # Note: generate_response is not defined in this snippet
349
+
350
+ # Return the formatted response
351
+ return f"{response}!"
352
+
353
+ def multiple_choice_greet(question, choice_A, choice_B, choice_C, choice_D):
354
+ """
355
+ Processes the user's question and multiple-choice options to generate a response.
356
+ Args:
357
+ question (str): The input question provided by the user.
358
+ choice_A (str): Option A for the question.
359
+ choice_B (str): Option B for the question.
360
+ choice_C (str): Option C for the question.
361
+ choice_D (str): Option D for the question.
362
+ Returns:
363
+ str: A response based on the input.
364
+ If no question is provided, returns a default message.
365
+ If no choices are provided, returns a default message.
366
+ """
367
+ if question == "":
368
+ # Return a default response if no question is provided
369
+ return "No question was given to answer"
370
+ if choice_A == "" and choice_B == "" and choice_C == "" and choice_D == "":
371
+ # Return a default response if no choices are provided
372
+ return "No choice was given"
373
+ else:
374
+ # Call a placeholder function (must be implemented separately) to generate a response
375
+ response = generate_response_multiple_choice(question, choice_A, choice_B, choice_C, choice_D)
376
+ actual_response = "Selected Choice: " + response['choice'] + "\nJustification: " + response['justification']
377
+ # Return the formatted response
378
+ return f"{actual_response}"
379
+
380
+ #### Function which enables the visibility of true/false questions interface
381
+ def show_true_false_interface():
382
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
383
+
384
+ #### Function which enables the visibility of open-ended questions interface
385
+ def show_open_ended_interface():
386
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
387
+
388
+ #### Function which enables the visibility of multiple-choice questions interface
389
+ def show_multiple_choice_interface():
390
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
391
+
392
+ # print(generate_response_multiple_choice("Which of the following best describes a bank’s legal duty in cases of phishing, according to Greek law?",
393
+ # "Taking all reasonable measures to safeguard user data and transactions",
394
+ # "Ensuring absolute prevention of all cyberattacks",
395
+ # "Holding customers solely responsible for phishing losses",
396
+ # "Avoiding liability by implementing disclaimers"
397
+ # ))
398
+
399
+
400
+ with gr.Blocks() as demo:
401
+
402
+ ### We define a row in which we create the navigation buttons for each question type
403
+ with gr.Row():
404
+ btn_t_f = gr.Button('True/False questions')
405
+ btn_open_ended = gr.Button('Open-Ended questions')
406
+ btn_m_c = gr.Button('Multiple-Choice questions')
407
+
408
+ ### We define the interface for the true/false questions
409
+ with gr.Column(visible=True) as true_false_interface:
410
+ gr.Markdown("## True-False Template")
411
+ question_simple = gr.Textbox(label="Enter your question")
412
+ simple_output = gr.Textbox(label="Output", interactive=False)
413
+ submit_simple = gr.Button("Submit")
414
+ submit_simple.click(true_false_greet, inputs=question_simple, outputs=simple_output)
415
+
416
+ ### We define the interface for the open-ended questions
417
+ with gr.Column(visible=False) as open_ended_interface:
418
+ gr.Markdown("## Open Ended Template")
419
+ question_simple = gr.Textbox(label="Enter your question")
420
+ simple_output = gr.Textbox(label="Output", interactive=False)
421
+ submit_simple = gr.Button("Submit")
422
+ submit_simple.click(open_ended_greet, inputs=question_simple, outputs=simple_output)
423
+
424
+ ### We define the interface for the multiple-choice questions
425
+ with gr.Column(visible=False) as mc_interface:
426
+ gr.Markdown("## Multiple-Choice Template")
427
+ question_mc = gr.Textbox(label="Enter your question")
428
+ choice_A = gr.Textbox(label="Choice A")
429
+ choice_B = gr.Textbox(label="Choice B")
430
+ choice_C = gr.Textbox(label="Choice C")
431
+ choice_D = gr.Textbox(label="Choice D")
432
+ mc_output = gr.Textbox(label="Output", interactive=False)
433
+ submit_mc = gr.Button("Submit")
434
+ submit_mc.click(multiple_choice_greet, inputs=[question_mc, choice_A, choice_B, choice_C, choice_D], outputs=mc_output)
435
+
436
+ ### If a navigation button is clicked, a visibility function is executed
437
+ btn_t_f.click(show_true_false_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
438
+ btn_open_ended.click(show_open_ended_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
439
+ btn_m_c.click(show_multiple_choice_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
440
+
441
+ demo.launch()