Spaces:

IoannisKat1
/

test1

Paused

App Files Files Community

IoannisKat1 commited on Dec 30, 2024

Commit

324f1b8

verified ·

1 Parent(s): 757619d

Update app.py

Browse files

Files changed (1) hide show

app.py +247 -20

app.py CHANGED Viewed

@@ -1,8 +1,211 @@
 from unsloth import FastLanguageModel
 import torch
 import gradio as gr
 model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
 def generate_response_true_false(instruction):
     """
     Generates a response using your fine-tuned model based on the provided instruction.
@@ -72,11 +275,22 @@ Answer the provided question with the knowledge provided to you
     inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
     with torch.no_grad():
-        outputs = model.generate(**inputs,early_stopping=False,min_length=50,length_penalty=2,max_length=300)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
-def generate_response_multiple_choice(instruction,choice_A,choice_B,choice_C,choice_D):
     """
     Generates a response using a fine-tuned language model for multiple-choice questions.
@@ -114,13 +328,8 @@ def generate_response_multiple_choice(instruction,choice_A,choice_B,choice_C,cho
     ### Question:
     {instruction}
-    ### Choices:
-    A) {choice_A}
-    B) {choice_B}
-    C) {choice_C}
-    D) {choice_D}
     ### Answer:
     """
     # Tokenize the prompt and move it to GPU for inference
@@ -137,12 +346,20 @@ def generate_response_multiple_choice(instruction,choice_A,choice_B,choice_C,cho
             max_new_tokens=300,
             top_p=0.95,
             top_k=50,
-            temperature=0.7,
             num_return_sequences=1
         )
     # Decode the response into text
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
 def true_false_greet(question):
@@ -170,10 +387,7 @@ def open_ended_greet(question):
     else:
         # Call a placeholder function (must be implemented separately) to generate a response
         response = generate_response_open_ended(question)  # Note: generate_response is not defined in this snippet
-        # Extract the answer from the generated response by splitting on "### Answer:"
-        # response = response.split('### Answer:')[1]
         # Return the formatted response
         return f"{response}!"
@@ -189,7 +403,7 @@ def multiple_choice_greet(question, choice_A, choice_B, choice_C, choice_D):
         choice_D (str): Option D for the question.
     Returns:
-        str: A response based on the input.
              If no question is provided, returns a default message.
              If no choices are provided, returns a default message.
     """
@@ -202,29 +416,39 @@ def multiple_choice_greet(question, choice_A, choice_B, choice_C, choice_D):
     else:
         # Call a placeholder function (must be implemented separately) to generate a response
         response = generate_response_multiple_choice(question, choice_A, choice_B, choice_C, choice_D)
-        # Extract the answer from the generated response by splitting on "### Answer:"
-        # response = response.split('### Answer:')[1]
         # Return the formatted response
-        return f"{response}"
 def show_true_false_interface():
     return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
 def show_open_ended_interface():
     return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
 def show_multiple_choice_interface():
     return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
 with gr.Blocks() as demo:
     with gr.Row():
         btn_t_f = gr.Button('True/False questions')
         btn_open_ended = gr.Button('Open-Ended questions')
         btn_m_c = gr.Button('Multiple-Choice questions')
     with gr.Column(visible=True) as true_false_interface:
         gr.Markdown("## True-False Template")
         question_simple = gr.Textbox(label="Enter your question")
@@ -232,6 +456,7 @@ with gr.Blocks() as demo:
         submit_simple = gr.Button("Submit")
         submit_simple.click(true_false_greet, inputs=question_simple, outputs=simple_output)
     with gr.Column(visible=False) as open_ended_interface:
         gr.Markdown("## Open Ended Template")
         question_simple = gr.Textbox(label="Enter your question")
@@ -239,6 +464,7 @@ with gr.Blocks() as demo:
         submit_simple = gr.Button("Submit")
         submit_simple.click(open_ended_greet, inputs=question_simple, outputs=simple_output)
     with gr.Column(visible=False) as mc_interface:
         gr.Markdown("## Multiple-Choice Template")
         question_mc = gr.Textbox(label="Enter your question")
@@ -250,8 +476,9 @@ with gr.Blocks() as demo:
         submit_mc = gr.Button("Submit")
         submit_mc.click(multiple_choice_greet, inputs=[question_mc, choice_A, choice_B, choice_C, choice_D], outputs=mc_output)
     btn_t_f.click(show_true_false_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
     btn_open_ended.click(show_open_ended_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
     btn_m_c.click(show_multiple_choice_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
-demo.launch(debug=True)

 from unsloth import FastLanguageModel
 import torch
 import gradio as gr
+import xml.etree.ElementTree as ET
+import re
+"""
+This module provides utilities for extracting structured data from text blocks.
+It supports parsing XML-like structures, Markdown-like formatting, and alternative
+text representations for extracting "choice" and "justification" fields.
+Functions:
+    extract_from_xml_et(text: str) -> dict
+        Parses XML-like text and extracts key-value pairs.
+    extract_choice(text: str) -> str
+        Extracts the choice (e.g., A), B), C), D)) from a text block.
+    extract_justification(text: str) -> str
+        Extracts the justification text from a text block.
+    extract_from_markdown_regex(text: str) -> dict
+        Extracts data from Markdown-like structured text, specifically "choice"
+        and "justification" fields.
+    extract_fields(text: str) -> list
+        Processes text blocks to extract structured data using a combination of
+        XML parsing, regex-based choice and justification extraction, and Markdown-like parsing.
+"""
+def extract_from_xml_et(text: str) -> dict:
+    """
+    Parses an XML-like string and extracts key-value pairs from its elements.
+    Parameters:
+        text (str): A string containing XML-like content (e.g., <tag>value</tag>).
+    Returns:
+        dict: A dictionary where the keys are lowercase XML tags and the values
+              are their corresponding text content.
+        None: Returns None if XML parsing fails.
+    Example:
+        >>> text = '<key>"value"</key>'
+        >>> extract_from_xml_et(text)
+        {'key': 'value'}
+    """
+    try:
+        wrapped_text = f"<root>{text}</root>"
+        root = ET.fromstring(wrapped_text)
+        data = {}
+        for child in root:
+            if child.text:
+                value = child.text.strip().strip('"')
+                data[child.tag.lower()] = value
+        return data
+    except ET.ParseError:
+        return None
+def extract_choice(text: str) -> str:
+    """
+    Extracts the choice (e.g., A), B), C), D)) from a text block.
+    Parameters:
+        text (str): Input text to search for the choice.
+    Returns:
+        str: The extracted choice, or None if not found.
+    Example:
+        >>> text = "A) This is a sample choice."
+        >>> extract_choice(text)
+        'A)'
+    """
+    choice_pattern = r'([A-D]\))'
+    match = re.search(choice_pattern, text)
+    if match:
+        return match.group(1).strip()
+    return None
+def extract_justification(text: str) -> str:
+    """
+    Extracts the justification text from a text block.
+    Parameters:
+        text (str): Input text to search for the justification.
+    Returns:
+        str: The extracted justification, or None if not found.
+    Example:
+        >>> text = "- Justification: This is the reason."
+        >>> extract_justification(text)
+        'This is the reason.'
+    """
+    justification_pattern = r'(?:- )?Justification:\s*(.+)'
+    match = re.search(justification_pattern, text)
+    if match:
+        return match.group(1).strip()
+    return None
+def extract_from_markdown_regex(text: str) -> dict:
+    """
+    Extracts structured data from Markdown-like text blocks.
+    Parameters:
+        text (str): Input text containing Markdown-like content, with **choice**
+                    and **justification** fields.
+    Returns:
+        dict: A dictionary containing "choice" and "justification", or None if no match is found.
+    Example:
+        >>> text = "**choice**: A **justification**: This is the reason."
+        >>> extract_from_markdown_regex(text)
+        {'choice': 'A', 'justification': 'This is the reason.'}
+    """
+    choice_pattern = r'\*\*choice\*\*:\s*(.+?)'
+    justification_pattern = r'\*\*justification\*\*:\s*([\s\S]+?)(?=\*\*choice\*\*|$)'
+    choice_match = re.search(choice_pattern, text)
+    justification_match = re.search(justification_pattern, text)
+    if choice_match and justification_match:
+        return {
+            "choice": choice_match.group(1).strip(),
+            "justification": justification_match.group(1).strip()
+        }
+    return None
+def extract_fields(text: str) -> list:
+    """
+    Processes text blocks to extract structured data.
+    This function attempts to parse each block using the following methods:
+        1. XML Parsing: Uses extract_from_xml_et to handle XML-like content.
+        2. Regex for Choice and Justification: Extracts these fields separately.
+        3. Markdown Parsing: Uses extract_from_markdown_regex for Markdown-like structures.
+    Parameters:
+        text (str): Input text containing one or more blocks of data.
+    Returns:
+        list: A list of dictionaries, each containing extracted data from a block.
+    Workflow:
+        1. Splits the input text into blocks using double line breaks (\n\n).
+        2. For each block:
+            - Attempts to parse it using extract_from_xml_et.
+            - If unsuccessful, tries extract_choice and extract_justification.
+            - Finally, falls back to extract_from_markdown_regex.
+        3. Aggregates the results into a list of dictionaries.
+    Example:
+        >>> text = '''
+        <key>"value"</key>
+        **choice**: A **justification**: This is the reason.
+        A) Taking all reasonable measures to safeguard user data,
+            - Justification: This is the reason.
+        '''
+        >>> extract_fields(text)
+        [
+            {'key': 'value'},
+            {'choice': 'A', 'justification': 'This is the reason.'},
+            {'choice': 'A)', 'justification': 'This is the reason.'}
+        ]
+    """
+    entries = []
+    blocks = re.split(r'\n\s*\n', text.strip())  # Split text into blocks by double newlines
+    for block in blocks:
+        print("Processing Block:", block)
+        extracted_data = {}
+        # Try extracting using XML
+        xml_data = extract_from_xml_et(block)
+        if xml_data:
+            print("Extracted via XML:", xml_data)
+            entries.append(xml_data)
+            continue
+        # Try extracting using separated choice and justification regex
+        choice = extract_choice(block)
+        justification = extract_justification(block)
+        if choice or justification:
+            extracted_data["choice"] = choice
+            extracted_data["justification"] = justification
+            entries.append(extracted_data)
+            continue
+        # Try extracting using Markdown regex
+        markdown_data = extract_from_markdown_regex(block)
+        if markdown_data:
+            print("Extracted via Markdown Regex:", markdown_data)
+            entries.append(markdown_data)
+    return entries
+### The code initializes the LLM model and tokenizer from a fine-tuned checkpoint located in a directory called unified_model.
 model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
 def generate_response_true_false(instruction):
     """
     Generates a response using your fine-tuned model based on the provided instruction.
     inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
     with torch.no_grad():
+        outputs = model.generate(**inputs,early_stopping=False,min_length=50,length_penalty=2,max_length=200)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract the answer from the generated response by splitting on "### Answer:"
+    response = response.split('### Answer:')[1]
     return response
+def generate_response_multiple_choice(question,choice_A,choice_B,choice_C,choice_D):
+    instruction = f'''{question}
+  Choices:
+  A) {choice_A},
+  B) {choice_B},
+  C) {choice_C},
+  D) {choice_D}
+    '''
     """
     Generates a response using a fine-tuned language model for multiple-choice questions.
     ### Question:
     {instruction}
     ### Answer:
     """
     # Tokenize the prompt and move it to GPU for inference
             max_new_tokens=300,
             top_p=0.95,
             top_k=50,
+            temperature=0.65,
             num_return_sequences=1
         )
     # Decode the response into text
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract the answer from the generated response by splitting on "### Answer:"
+    response = response.split('### Answer:')[1]
+    print("RESPONSE",response)
+    data = extract_fields(response)
+    if len(data) == 0:
+      response = {"choice": data[0]['choice'], "justification": data[0]['justification']}
+    else:
+      response = {"choice": data[-1]['choice'], "justification": data[-1]['justification']}
     return response
 def true_false_greet(question):
     else:
         # Call a placeholder function (must be implemented separately) to generate a response
         response = generate_response_open_ended(question)  # Note: generate_response is not defined in this snippet
         # Return the formatted response
         return f"{response}!"
         choice_D (str): Option D for the question.
     Returns:
+        str: A response based on the input.
              If no question is provided, returns a default message.
              If no choices are provided, returns a default message.
     """
     else:
         # Call a placeholder function (must be implemented separately) to generate a response
         response = generate_response_multiple_choice(question, choice_A, choice_B, choice_C, choice_D)
+        actual_response = "Selected Choice: " + response['choice'] + "\nJustification: " + response['justification']
         # Return the formatted response
+        return f"{actual_response}"
+#### Function which enables the visibility of true/false questions interface
 def show_true_false_interface():
     return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+#### Function which enables the visibility of open-ended questions interface
 def show_open_ended_interface():
     return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
+#### Function which enables the visibility of multiple-choice questions interface
 def show_multiple_choice_interface():
     return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
+# print(generate_response_multiple_choice("Which of the following best describes a bank’s legal duty in cases of phishing, according to Greek law?",
+#                                         "Taking all reasonable measures to safeguard user data and transactions",
+#                                         "Ensuring absolute prevention of all cyberattacks",
+#                                         "Holding customers solely responsible for phishing losses",
+#                                         "Avoiding liability by implementing disclaimers"
+#                                         ))
 with gr.Blocks() as demo:
+    ### We define a row in which we create the navigation buttons for each question type
     with gr.Row():
         btn_t_f = gr.Button('True/False questions')
         btn_open_ended = gr.Button('Open-Ended questions')
         btn_m_c = gr.Button('Multiple-Choice questions')
+    ### We define the interface for the true/false questions
     with gr.Column(visible=True) as true_false_interface:
         gr.Markdown("## True-False Template")
         question_simple = gr.Textbox(label="Enter your question")
         submit_simple = gr.Button("Submit")
         submit_simple.click(true_false_greet, inputs=question_simple, outputs=simple_output)
+    ### We define the interface for the open-ended questions
     with gr.Column(visible=False) as open_ended_interface:
         gr.Markdown("## Open Ended Template")
         question_simple = gr.Textbox(label="Enter your question")
         submit_simple = gr.Button("Submit")
         submit_simple.click(open_ended_greet, inputs=question_simple, outputs=simple_output)
+    ### We define the interface for the multiple-choice questions
     with gr.Column(visible=False) as mc_interface:
         gr.Markdown("## Multiple-Choice Template")
         question_mc = gr.Textbox(label="Enter your question")
         submit_mc = gr.Button("Submit")
         submit_mc.click(multiple_choice_greet, inputs=[question_mc, choice_A, choice_B, choice_C, choice_D], outputs=mc_output)
+    ### If a navigation button is clicked, a visibility function is executed
     btn_t_f.click(show_true_false_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
     btn_open_ended.click(show_open_ended_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
     btn_m_c.click(show_multiple_choice_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
+demo.launch()