Spaces:

IoannisKat1
/

AILA_Workspace

Runtime error

App Files Files Community

IoannisKat1 commited on Dec 30, 2024

Commit

5f15a71

verified ·

1 Parent(s): 038b22b

Create app.py

Browse files

Files changed (1) hide show

app.py +441 -0

app.py ADDED Viewed

	@@ -0,0 +1,441 @@

+from unsloth import FastLanguageModel
+import torch
+import gradio as gr
+import xml.etree.ElementTree as ET
+import re
+"""
+This module provides utilities for extracting structured data from text blocks.
+It supports parsing XML-like structures, Markdown-like formatting, and alternative
+text representations for extracting "choice" and "justification" fields.
+Functions:
+    extract_from_xml_et(text: str) -> dict
+        Parses XML-like text and extracts key-value pairs.
+    extract_choice(text: str) -> str
+        Extracts the choice (e.g., A), B), C), D)) from a text block.
+    extract_justification(text: str) -> str
+        Extracts the justification text from a text block.
+    extract_from_markdown_regex(text: str) -> dict
+        Extracts data from Markdown-like structured text, specifically "choice"
+        and "justification" fields.
+    extract_fields(text: str) -> list
+        Processes text blocks to extract structured data using a combination of
+        XML parsing, regex-based choice and justification extraction, and Markdown-like parsing.
+"""
+def extract_from_xml_et(text: str) -> dict:
+    """
+    Parses an XML-like string and extracts key-value pairs from its elements.
+    Parameters:
+        text (str): A string containing XML-like content (e.g., <tag>value</tag>).
+    Returns:
+        dict: A dictionary where the keys are lowercase XML tags and the values
+              are their corresponding text content.
+        None: Returns None if XML parsing fails.
+    Example:
+        >>> text = '<key>"value"</key>'
+        >>> extract_from_xml_et(text)
+        {'key': 'value'}
+    """
+    try:
+        wrapped_text = f"<root>{text}</root>"
+        root = ET.fromstring(wrapped_text)
+        data = {}
+        for child in root:
+            if child.text:
+                value = child.text.strip().strip('"')
+                data[child.tag.lower()] = value
+        return data
+    except ET.ParseError:
+        return None
+def extract_choice(text: str) -> str:
+    """
+    Extracts the choice (e.g., A), B), C), D)) from a text block.
+    Parameters:
+        text (str): Input text to search for the choice.
+    Returns:
+        str: The extracted choice, or None if not found.
+    Example:
+        >>> text = "A) This is a sample choice."
+        >>> extract_choice(text)
+        'A)'
+    """
+    choice_pattern = r'([A-D]\))'
+    match = re.search(choice_pattern, text)
+    if match:
+        return match.group(1).strip()
+    return None
+def extract_justification(text: str) -> str:
+    """
+    Extracts the justification text from a text block.
+    Parameters:
+        text (str): Input text to search for the justification.
+    Returns:
+        str: The extracted justification, or None if not found.
+    Example:
+        >>> text = "- Justification: This is the reason."
+        >>> extract_justification(text)
+        'This is the reason.'
+    """
+    justification_pattern = r'(?:- )?Justification:\s*(.+)'
+    match = re.search(justification_pattern, text)
+    if match:
+        return match.group(1).strip()
+    return None
+def extract_from_markdown_regex(text: str) -> dict:
+    """
+    Extracts structured data from Markdown-like text blocks.
+    Parameters:
+        text (str): Input text containing Markdown-like content, with **choice**
+                    and **justification** fields.
+    Returns:
+        dict: A dictionary containing "choice" and "justification", or None if no match is found.
+    Example:
+        >>> text = "**choice**: A **justification**: This is the reason."
+        >>> extract_from_markdown_regex(text)
+        {'choice': 'A', 'justification': 'This is the reason.'}
+    """
+    choice_pattern = r'\*\*choice\*\*:\s*(.+?)'
+    justification_pattern = r'\*\*justification\*\*:\s*([\s\S]+?)(?=\*\*choice\*\*|$)'
+    choice_match = re.search(choice_pattern, text)
+    justification_match = re.search(justification_pattern, text)
+    if choice_match and justification_match:
+        return {
+            "choice": choice_match.group(1).strip(),
+            "justification": justification_match.group(1).strip()
+        }
+    return None
+def extract_fields(text: str) -> list:
+    """
+    Processes text blocks to extract structured data.
+    This function attempts to parse each block using the following methods:
+        1. XML Parsing: Uses extract_from_xml_et to handle XML-like content.
+        2. Regex for Choice and Justification: Extracts these fields separately.
+        3. Markdown Parsing: Uses extract_from_markdown_regex for Markdown-like structures.
+    Parameters:
+        text (str): Input text containing one or more blocks of data.
+    Returns:
+        list: A list of dictionaries, each containing extracted data from a block.
+    Workflow:
+        1. Splits the input text into blocks using double line breaks (\n\n).
+        2. For each block:
+            - Attempts to parse it using extract_from_xml_et.
+            - If unsuccessful, tries extract_choice and extract_justification.
+            - Finally, falls back to extract_from_markdown_regex.
+        3. Aggregates the results into a list of dictionaries.
+    Example:
+        >>> text = '''
+        <key>"value"</key>
+        **choice**: A **justification**: This is the reason.
+        A) Taking all reasonable measures to safeguard user data,
+            - Justification: This is the reason.
+        '''
+        >>> extract_fields(text)
+        [
+            {'key': 'value'},
+            {'choice': 'A', 'justification': 'This is the reason.'},
+            {'choice': 'A)', 'justification': 'This is the reason.'}
+        ]
+    """
+    entries = []
+    blocks = re.split(r'\n\s*\n', text.strip())  # Split text into blocks by double newlines
+    for block in blocks:
+        print("Processing Block:", block)
+        extracted_data = {}
+        # Try extracting using XML
+        xml_data = extract_from_xml_et(block)
+        if xml_data:
+            print("Extracted via XML:", xml_data)
+            entries.append(xml_data)
+            continue
+        # Try extracting using separated choice and justification regex
+        choice = extract_choice(block)
+        justification = extract_justification(block)
+        if choice or justification:
+            extracted_data["choice"] = choice
+            extracted_data["justification"] = justification
+            entries.append(extracted_data)
+            continue
+        # Try extracting using Markdown regex
+        markdown_data = extract_from_markdown_regex(block)
+        if markdown_data:
+            print("Extracted via Markdown Regex:", markdown_data)
+            entries.append(markdown_data)
+    return entries
+### The code initializes the LLM model and tokenizer from a fine-tuned checkpoint located in a directory called unified_model.
+model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
+def generate_response_true_false(instruction):
+    """
+    Generates a response using your fine-tuned model based on the provided instruction.
+    This function enables faster inference through the `FastLanguageModel` and prepares a
+    prompt for the model to determine whether the given statement is "True" or "False".
+    Args:
+        instruction (str): A string containing the statement and instructions to be evaluated.
+    Returns:
+        str: "True" or "False" based on the model's response, or "Unable to determine" if the
+             response cannot be parsed reliably.
+    """
+    FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
+    prompt = f"""### Instruction:
+Determine if the following statement is true or false. Respond only with "True" or "False".
+### Statement:
+{instruction}
+### Answer:"""
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=50)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    response = response.split("### Answer:")[-1].strip()
+    # Extract True/False from response
+    if response.lower() == "true":
+        return "True"
+    elif response.lower() == "false":
+        return "False"
+    else:
+        # Try to identify the answer even if it's not perfectly formatted
+        if "true" in response.lower():
+            return "True"
+        elif "false" in response.lower():
+            return "False"
+        else:
+            return "Unable to determine."
+def generate_response_open_ended(instruction):
+    """
+    Generates a response using your fine-tuned model based on the provided instruction.
+    This function enables faster inference through the `FastLanguageModel` and prepares a
+    prompt for the model to determine whether the given statement is "True" or "False".
+    Args:
+        instruction (str): A string containing the statement and instructions to be evaluated.
+    Returns:
+        str: A response from the model to the provided question or "Unable to determine" if the
+             response cannot be parsed reliably.
+    """
+    FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
+    prompt = f"""### Instruction:
+Answer the provided question with the knowledge provided to you
+### Question:
+{instruction}
+### Answer:
+"""
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    with torch.no_grad():
+        outputs = model.generate(**inputs,early_stopping=False,min_length=50,length_penalty=2,max_length=200)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract the answer from the generated response by splitting on "### Answer:"
+    response = response.split('### Answer:')[1]
+    return response
+def generate_response_multiple_choice(question,choice_A,choice_B,choice_C,choice_D):
+    instruction = f'''{question}
+  Choices:
+  A) {choice_A},
+  B) {choice_B},
+  C) {choice_C},
+  D) {choice_D}
+    '''
+    """
+    Generates a response using a fine-tuned language model for multiple-choice questions.
+    Args:
+        instruction (str): A string containing the question and its options.
+    Returns:
+        dict: A dictionary with the selected choice and its justification.
+              Example:
+              {
+                  "choice": "A",
+                  "justification": "Explanation for why Option A is correct."
+              }
+              If the model fails to provide a valid response, defaults to:
+              {
+                  "choice": "None",
+                  "justification": "Could not parse JSON"
+              }
+    """
+    # Enable native faster inference for the model
+    FastLanguageModel.for_inference(model)
+    # Define the prompt with a detailed instruction for the model
+    prompt = f"""### Instruction:
+    In the following question, you are provided with 4 choices. Select the best choice based on the knowledge provided and provide a justification for that choice.
+    **You must return only your response with the following keys:**
+      - "choice": The best choice letter
+      - "justification": The justification for your choice
+    **Example Response:**
+      **choice**: A
+      **justification**: Explanation for why Option A is correct
+    ### Question:
+    {instruction}
+    ### Answer:
+    """
+    # Tokenize the prompt and move it to GPU for inference
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    # Generate a response from the model
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            early_stopping=True,
+            min_length=50,
+            length_penalty=2,
+            do_sample=True,
+            max_new_tokens=300,
+            top_p=0.95,
+            top_k=50,
+            temperature=0.65,
+            num_return_sequences=1
+        )
+    # Decode the response into text
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract the answer from the generated response by splitting on "### Answer:"
+    response = response.split('### Answer:')[1]
+    print("RESPONSE",response)
+    data = extract_fields(response)
+    if len(data) == 0:
+      response = {"choice": data[0]['choice'], "justification": data[0]['justification']}
+    else:
+      response = {"choice": data[-1]['choice'], "justification": data[-1]['justification']}
+    return response
+def true_false_greet(question):
+    if question == "":
+        # Return a default response if no input is given
+        return "No question was given to answer"
+    else:
+        # Call a placeholder function (must be implemented separately)
+        response = generate_response_true_false(question)  # Note: This function is not defined in this code
+        return f"{response}!"
+def open_ended_greet(question):
+    """
+    Processes the user's question and returns a response.
+    Args:
+        question (str): The input text provided by the user.
+    Returns:
+        str: A processed response. If no input is given, a default message is returned.
+    """
+    if question == "":
+        # Return a default response if no question is provided
+        return "No question was given to answer"
+    else:
+        # Call a placeholder function (must be implemented separately) to generate a response
+        response = generate_response_open_ended(question)  # Note: generate_response is not defined in this snippet
+        # Return the formatted response
+        return f"{response}!"
+def multiple_choice_greet(question, choice_A, choice_B, choice_C, choice_D):
+    """
+    Processes the user's question and multiple-choice options to generate a response.
+    Args:
+        question (str): The input question provided by the user.
+        choice_A (str): Option A for the question.
+        choice_B (str): Option B for the question.
+        choice_C (str): Option C for the question.
+        choice_D (str): Option D for the question.
+    Returns:
+        str: A response based on the input.
+             If no question is provided, returns a default message.
+             If no choices are provided, returns a default message.
+    """
+    if question == "":
+        # Return a default response if no question is provided
+        return "No question was given to answer"
+    if choice_A == "" and choice_B == "" and choice_C == "" and choice_D == "":
+        # Return a default response if no choices are provided
+        return "No choice was given"
+    else:
+        # Call a placeholder function (must be implemented separately) to generate a response
+        response = generate_response_multiple_choice(question, choice_A, choice_B, choice_C, choice_D)
+        actual_response = "Selected Choice: " + response['choice'] + "\nJustification: " + response['justification']
+        # Return the formatted response
+        return f"{actual_response}"
+#### Function which enables the visibility of true/false questions interface
+def show_true_false_interface():
+    return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+#### Function which enables the visibility of open-ended questions interface
+def show_open_ended_interface():
+    return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
+#### Function which enables the visibility of multiple-choice questions interface
+def show_multiple_choice_interface():
+    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
+# print(generate_response_multiple_choice("Which of the following best describes a bank’s legal duty in cases of phishing, according to Greek law?",
+#                                         "Taking all reasonable measures to safeguard user data and transactions",
+#                                         "Ensuring absolute prevention of all cyberattacks",
+#                                         "Holding customers solely responsible for phishing losses",
+#                                         "Avoiding liability by implementing disclaimers"
+#                                         ))
+with gr.Blocks() as demo:
+    ### We define a row in which we create the navigation buttons for each question type
+    with gr.Row():
+        btn_t_f = gr.Button('True/False questions')
+        btn_open_ended = gr.Button('Open-Ended questions')
+        btn_m_c = gr.Button('Multiple-Choice questions')
+    ### We define the interface for the true/false questions
+    with gr.Column(visible=True) as true_false_interface:
+        gr.Markdown("## True-False Template")
+        question_simple = gr.Textbox(label="Enter your question")
+        simple_output = gr.Textbox(label="Output", interactive=False)
+        submit_simple = gr.Button("Submit")
+        submit_simple.click(true_false_greet, inputs=question_simple, outputs=simple_output)
+    ### We define the interface for the open-ended questions
+    with gr.Column(visible=False) as open_ended_interface:
+        gr.Markdown("## Open Ended Template")
+        question_simple = gr.Textbox(label="Enter your question")
+        simple_output = gr.Textbox(label="Output", interactive=False)
+        submit_simple = gr.Button("Submit")
+        submit_simple.click(open_ended_greet, inputs=question_simple, outputs=simple_output)
+    ### We define the interface for the multiple-choice questions
+    with gr.Column(visible=False) as mc_interface:
+        gr.Markdown("## Multiple-Choice Template")
+        question_mc = gr.Textbox(label="Enter your question")
+        choice_A = gr.Textbox(label="Choice A")
+        choice_B = gr.Textbox(label="Choice B")
+        choice_C = gr.Textbox(label="Choice C")
+        choice_D = gr.Textbox(label="Choice D")
+        mc_output = gr.Textbox(label="Output", interactive=False)
+        submit_mc = gr.Button("Submit")
+        submit_mc.click(multiple_choice_greet, inputs=[question_mc, choice_A, choice_B, choice_C, choice_D], outputs=mc_output)
+    ### If a navigation button is clicked, a visibility function is executed
+    btn_t_f.click(show_true_false_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
+    btn_open_ended.click(show_open_ended_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
+    btn_m_c.click(show_multiple_choice_interface, outputs=[true_false_interface, open_ended_interface, mc_interface])
+demo.launch()