Spaces:
Paused
Paused
| from unsloth import FastLanguageModel | |
| import torch | |
| import gradio as gr | |
| import xml.etree.ElementTree as ET | |
| import re | |
| """ | |
| This module provides utilities for extracting structured data from text blocks. | |
| It supports parsing XML-like structures, Markdown-like formatting, and alternative | |
| text representations for extracting "choice" and "justification" fields. | |
| Functions: | |
| extract_from_xml_et(text: str) -> dict | |
| Parses XML-like text and extracts key-value pairs. | |
| extract_choice(text: str) -> str | |
| Extracts the choice (e.g., A), B), C), D)) from a text block. | |
| extract_justification(text: str) -> str | |
| Extracts the justification text from a text block. | |
| extract_from_markdown_regex(text: str) -> dict | |
| Extracts data from Markdown-like structured text, specifically "choice" | |
| and "justification" fields. | |
| extract_fields(text: str) -> list | |
| Processes text blocks to extract structured data using a combination of | |
| XML parsing, regex-based choice and justification extraction, and Markdown-like parsing. | |
| """ | |
| def extract_from_xml_et(text: str) -> dict: | |
| """ | |
| Parses an XML-like string and extracts key-value pairs from its elements. | |
| Parameters: | |
| text (str): A string containing XML-like content (e.g., <tag>value</tag>). | |
| Returns: | |
| dict: A dictionary where the keys are lowercase XML tags and the values | |
| are their corresponding text content. | |
| None: Returns None if XML parsing fails. | |
| Example: | |
| >>> text = '<key>"value"</key>' | |
| >>> extract_from_xml_et(text) | |
| {'key': 'value'} | |
| """ | |
| try: | |
| wrapped_text = f"<root>{text}</root>" | |
| root = ET.fromstring(wrapped_text) | |
| data = {} | |
| for child in root: | |
| if child.text: | |
| value = child.text.strip().strip('"') | |
| data[child.tag.lower()] = value | |
| return data | |
| except ET.ParseError: | |
| return None | |
| def extract_choice(text: str) -> str: | |
| """ | |
| Extracts the choice (e.g., A), B), C), D)) from a text block. | |
| Parameters: | |
| text (str): Input text to search for the choice. | |
| Returns: | |
| str: The extracted choice, or None if not found. | |
| Example: | |
| >>> text = "A) This is a sample choice." | |
| >>> extract_choice(text) | |
| 'A)' | |
| """ | |
| choice_pattern = r'([A-D]\))' | |
| match = re.search(choice_pattern, text) | |
| if match: | |
| return match.group(1).strip() | |
| return None | |
| def extract_justification(text: str) -> str: | |
| """ | |
| Extracts the justification text from a text block. | |
| Parameters: | |
| text (str): Input text to search for the justification. | |
| Returns: | |
| str: The extracted justification, or None if not found. | |
| Example: | |
| >>> text = "- Justification: This is the reason." | |
| >>> extract_justification(text) | |
| 'This is the reason.' | |
| """ | |
| justification_pattern = r'(?:- )?Justification:\s*(.+)' | |
| match = re.search(justification_pattern, text) | |
| if match: | |
| return match.group(1).strip() | |
| return None | |
| def extract_from_markdown_regex(text: str) -> dict: | |
| """ | |
| Extracts structured data from Markdown-like text blocks. | |
| Parameters: | |
| text (str): Input text containing Markdown-like content, with **choice** | |
| and **justification** fields. | |
| Returns: | |
| dict: A dictionary containing "choice" and "justification", or None if no match is found. | |
| Example: | |
| >>> text = "**choice**: A **justification**: This is the reason." | |
| >>> extract_from_markdown_regex(text) | |
| {'choice': 'A', 'justification': 'This is the reason.'} | |
| """ | |
| choice_pattern = r'\*\*choice\*\*:\s*(.+?)' | |
| justification_pattern = r'\*\*justification\*\*:\s*([\s\S]+?)(?=\*\*choice\*\*|$)' | |
| choice_match = re.search(choice_pattern, text) | |
| justification_match = re.search(justification_pattern, text) | |
| if choice_match and justification_match: | |
| return { | |
| "choice": choice_match.group(1).strip(), | |
| "justification": justification_match.group(1).strip() | |
| } | |
| return None | |
| def extract_fields(text: str) -> list: | |
| """ | |
| Processes text blocks to extract structured data. | |
| This function attempts to parse each block using the following methods: | |
| 1. XML Parsing: Uses extract_from_xml_et to handle XML-like content. | |
| 2. Regex for Choice and Justification: Extracts these fields separately. | |
| 3. Markdown Parsing: Uses extract_from_markdown_regex for Markdown-like structures. | |
| Parameters: | |
| text (str): Input text containing one or more blocks of data. | |
| Returns: | |
| list: A list of dictionaries, each containing extracted data from a block. | |
| Workflow: | |
| 1. Splits the input text into blocks using double line breaks (\n\n). | |
| 2. For each block: | |
| - Attempts to parse it using extract_from_xml_et. | |
| - If unsuccessful, tries extract_choice and extract_justification. | |
| - Finally, falls back to extract_from_markdown_regex. | |
| 3. Aggregates the results into a list of dictionaries. | |
| Example: | |
| >>> text = ''' | |
| <key>"value"</key> | |
| **choice**: A **justification**: This is the reason. | |
| A) Taking all reasonable measures to safeguard user data, | |
| - Justification: This is the reason. | |
| ''' | |
| >>> extract_fields(text) | |
| [ | |
| {'key': 'value'}, | |
| {'choice': 'A', 'justification': 'This is the reason.'}, | |
| {'choice': 'A)', 'justification': 'This is the reason.'} | |
| ] | |
| """ | |
| entries = [] | |
| blocks = re.split(r'\n\s*\n', text.strip()) # Split text into blocks by double newlines | |
| for block in blocks: | |
| print("Processing Block:", block) | |
| extracted_data = {} | |
| # Try extracting using XML | |
| xml_data = extract_from_xml_et(block) | |
| if xml_data: | |
| print("Extracted via XML:", xml_data) | |
| entries.append(xml_data) | |
| continue | |
| # Try extracting using separated choice and justification regex | |
| choice = extract_choice(block) | |
| justification = extract_justification(block) | |
| if choice or justification: | |
| extracted_data["choice"] = choice | |
| extracted_data["justification"] = justification | |
| entries.append(extracted_data) | |
| continue | |
| # Try extracting using Markdown regex | |
| markdown_data = extract_from_markdown_regex(block) | |
| if markdown_data: | |
| print("Extracted via Markdown Regex:", markdown_data) | |
| entries.append(markdown_data) | |
| return entries | |
| ### The code initializes the LLM model and tokenizer from a fine-tuned checkpoint located in a directory called unified_model. | |
| model,tokenizer = FastLanguageModel.from_pretrained('./unified_model') | |
| def generate_response_true_false(instruction): | |
| """ | |
| Generates a response using your fine-tuned model based on the provided instruction. | |
| This function enables faster inference through the `FastLanguageModel` and prepares a | |
| prompt for the model to determine whether the given statement is "True" or "False". | |
| Args: | |
| instruction (str): A string containing the statement and instructions to be evaluated. | |
| Returns: | |
| str: "True" or "False" based on the model's response, or "Unable to determine" if the | |
| response cannot be parsed reliably. | |
| """ | |
| FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function | |
| prompt = f"""### Instruction: | |
| Determine if the following statement is true or false. Respond only with "True" or "False". | |
| ### Statement: | |
| {instruction} | |
| ### Answer:""" | |
| inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_new_tokens=50) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| response = response.split("### Answer:")[-1].strip() | |
| # Extract True/False from response | |
| if response.lower() == "true": | |
| return "True" | |
| elif response.lower() == "false": | |
| return "False" | |
| else: | |
| # Try to identify the answer even if it's not perfectly formatted | |
| if "true" in response.lower(): | |
| return "True" | |
| elif "false" in response.lower(): | |
| return "False" | |
| else: | |
| return "Unable to determine." | |
| def generate_response_open_ended(instruction): | |
| """ | |
| Generates a response using your fine-tuned model based on the provided instruction. | |
| This function enables faster inference through the `FastLanguageModel` and prepares a | |
| prompt for the model to determine whether the given statement is "True" or "False". | |
| Args: | |
| instruction (str): A string containing the statement and instructions to be evaluated. | |
| Returns: | |
| str: A response from the model to the provided question or "Unable to determine" if the | |
| response cannot be parsed reliably. | |
| """ | |
| FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function | |
| prompt = f"""### Instruction: | |
| Answer the provided question with the knowledge provided to you | |
| ### Question: | |
| {instruction} | |
| ### Answer: | |
| """ | |
| inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs,early_stopping=False,min_length=50,length_penalty=2,max_length=200) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract the answer from the generated response by splitting on "### Answer:" | |
| response = response.split('### Answer:')[1] | |
| return response | |
| def generate_response_multiple_choice(question,choice_A,choice_B,choice_C,choice_D): | |
| instruction = f'''{question} | |
| Choices: | |
| A) {choice_A}, | |
| B) {choice_B}, | |
| C) {choice_C}, | |
| D) {choice_D} | |
| ''' | |
| """ | |
| Generates a response using a fine-tuned language model for multiple-choice questions. | |
| Args: | |
| instruction (str): A string containing the question and its options. | |
| Returns: | |
| dict: A dictionary with the selected choice and its justification. | |
| Example: | |
| { | |
| "choice": "A", | |
| "justification": "Explanation for why Option A is correct." | |
| } | |
| If the model fails to provide a valid response, defaults to: | |
| { | |
| "choice": "None", | |
| "justification": "Could not parse JSON" | |
| } | |
| """ | |
| # Enable native faster inference for the model | |
| FastLanguageModel.for_inference(model) | |
| # Define the prompt with a detailed instruction for the model | |
| prompt = f"""### Instruction: | |
| In the following question, you are provided with 4 choices. Select the best choice based on the knowledge provided and provide a justification for that choice. | |
| **You must return only your response with the following keys:** | |
| - "choice": The best choice letter | |
| - "justification": The justification for your choice | |
| **Example Response:** | |
| **choice**: A | |
| **justification**: Explanation for why Option A is correct | |
| ### Question: | |
| {instruction} | |
| ### Answer: | |
| """ | |
| # Tokenize the prompt and move it to GPU for inference | |
| inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
| # Generate a response from the model | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| early_stopping=True, | |
| min_length=50, | |
| length_penalty=2, | |
| do_sample=True, | |
| max_new_tokens=300, | |
| top_p=0.95, | |
| top_k=50, | |
| temperature=0.65, | |
| num_return_sequences=1 | |
| ) | |
| # Decode the response into text | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract the answer from the generated response by splitting on "### Answer:" | |
| response = response.split('### Answer:')[1] | |
| print("RESPONSE",response) | |
| data = extract_fields(response) | |
| if len(data) == 0: | |
| response = {"choice": data[0]['choice'], "justification": data[0]['justification']} | |
| else: | |
| response = {"choice": data[-1]['choice'], "justification": data[-1]['justification']} | |
| return response | |
| def true_false_greet(question): | |
| if question == "": | |
| # Return a default response if no input is given | |
| return "No question was given to answer" | |
| else: | |
| # Call a placeholder function (must be implemented separately) | |
| response = generate_response_true_false(question) # Note: This function is not defined in this code | |
| return f"{response}!" | |
| def open_ended_greet(question): | |
| """ | |
| Processes the user's question and returns a response. | |
| Args: | |
| question (str): The input text provided by the user. | |
| Returns: | |
| str: A processed response. If no input is given, a default message is returned. | |
| """ | |
| if question == "": | |
| # Return a default response if no question is provided | |
| return "No question was given to answer" | |
| else: | |
| # Call a placeholder function (must be implemented separately) to generate a response | |
| response = generate_response_open_ended(question) # Note: generate_response is not defined in this snippet | |
| # Return the formatted response | |
| return f"{response}!" | |
| def multiple_choice_greet(question, choice_A, choice_B, choice_C, choice_D): | |
| """ | |
| Processes the user's question and multiple-choice options to generate a response. | |
| Args: | |
| question (str): The input question provided by the user. | |
| choice_A (str): Option A for the question. | |
| choice_B (str): Option B for the question. | |
| choice_C (str): Option C for the question. | |
| choice_D (str): Option D for the question. | |
| Returns: | |
| str: A response based on the input. | |
| If no question is provided, returns a default message. | |
| If no choices are provided, returns a default message. | |
| """ | |
| if question == "": | |
| # Return a default response if no question is provided | |
| return "No question was given to answer" | |
| if choice_A == "" and choice_B == "" and choice_C == "" and choice_D == "": | |
| # Return a default response if no choices are provided | |
| return "No choice was given" | |
| else: | |
| # Call a placeholder function (must be implemented separately) to generate a response | |
| response = generate_response_multiple_choice(question, choice_A, choice_B, choice_C, choice_D) | |
| actual_response = "Selected Choice: " + response['choice'] + "\nJustification: " + response['justification'] | |
| # Return the formatted response | |
| return f"{actual_response}" | |
| #### Function which enables the visibility of true/false questions interface | |
| def show_true_false_interface(): | |
| return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) | |
| #### Function which enables the visibility of open-ended questions interface | |
| def show_open_ended_interface(): | |
| return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False) | |
| #### Function which enables the visibility of multiple-choice questions interface | |
| def show_multiple_choice_interface(): | |
| return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) | |
| # print(generate_response_multiple_choice("Which of the following best describes a bank’s legal duty in cases of phishing, according to Greek law?", | |
| # "Taking all reasonable measures to safeguard user data and transactions", | |
| # "Ensuring absolute prevention of all cyberattacks", | |
| # "Holding customers solely responsible for phishing losses", | |
| # "Avoiding liability by implementing disclaimers" | |
| # )) | |
| with gr.Blocks() as demo: | |
| ### We define a row in which we create the navigation buttons for each question type | |
| with gr.Row(): | |
| btn_t_f = gr.Button('True/False questions') | |
| btn_open_ended = gr.Button('Open-Ended questions') | |
| btn_m_c = gr.Button('Multiple-Choice questions') | |
| ### We define the interface for the true/false questions | |
| with gr.Column(visible=True) as true_false_interface: | |
| gr.Markdown("## True-False Template") | |
| question_simple = gr.Textbox(label="Enter your question") | |
| simple_output = gr.Textbox(label="Output", interactive=False) | |
| submit_simple = gr.Button("Submit") | |
| submit_simple.click(true_false_greet, inputs=question_simple, outputs=simple_output) | |
| ### We define the interface for the open-ended questions | |
| with gr.Column(visible=False) as open_ended_interface: | |
| gr.Markdown("## Open Ended Template") | |
| question_simple = gr.Textbox(label="Enter your question") | |
| simple_output = gr.Textbox(label="Output", interactive=False) | |
| submit_simple = gr.Button("Submit") | |
| submit_simple.click(open_ended_greet, inputs=question_simple, outputs=simple_output) | |
| ### We define the interface for the multiple-choice questions | |
| with gr.Column(visible=False) as mc_interface: | |
| gr.Markdown("## Multiple-Choice Template") | |
| question_mc = gr.Textbox(label="Enter your question") | |
| choice_A = gr.Textbox(label="Choice A") | |
| choice_B = gr.Textbox(label="Choice B") | |
| choice_C = gr.Textbox(label="Choice C") | |
| choice_D = gr.Textbox(label="Choice D") | |
| mc_output = gr.Textbox(label="Output", interactive=False) | |
| submit_mc = gr.Button("Submit") | |
| submit_mc.click(multiple_choice_greet, inputs=[question_mc, choice_A, choice_B, choice_C, choice_D], outputs=mc_output) | |
| ### If a navigation button is clicked, a visibility function is executed | |
| btn_t_f.click(show_true_false_interface, outputs=[true_false_interface, open_ended_interface, mc_interface]) | |
| btn_open_ended.click(show_open_ended_interface, outputs=[true_false_interface, open_ended_interface, mc_interface]) | |
| btn_m_c.click(show_multiple_choice_interface, outputs=[true_false_interface, open_ended_interface, mc_interface]) | |
| demo.launch() |