H2H-eval-comparator

Sleeping

App Files Files Community

rohansampath commited on Feb 11

Commit

614dffd

verified ·

1 Parent(s): 9775899

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -23

app.py CHANGED Viewed

@@ -4,12 +4,13 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import evaluate
 import re
 import matplotlib
-matplotlib.use('Agg')  # for non-interactive envs
 import matplotlib.pyplot as plt
 import io
 import base64
 import os
 from huggingface_hub import login
 # Read token and login
 hf_token = os.getenv("HF_TOKEN_READ_WRITE")
@@ -18,28 +19,26 @@ if hf_token:
 else:
     print("⚠️ No HF_TOKEN_READ_WRITE found in environment")
-# Check GPU availability
-if torch.cuda.is_available():
-    print("✅ GPU is available")
-    print("GPU Name:", torch.cuda.get_device_name(0))
-else:
-    print("❌ No GPU available")
 # ---------------------------------------------------------------------------
-# 1. Define model name and load model/tokenizer
 # ---------------------------------------------------------------------------
 model_name = "mistralai/Mistral-7B-Instruct-v0.3"
-tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    token=hf_token,
-    torch_dtype=torch.float16,
-    device_map="auto"
-)
-print(f"✅ Model loaded on {device}")
 # ---------------------------------------------------------------------------
 # 2. Test dataset
@@ -58,14 +57,17 @@ accuracy_metric = evaluate.load("accuracy")
 # ---------------------------------------------------------------------------
 # 4. Inference helper functions
 # ---------------------------------------------------------------------------
 def generate_answer(question):
     """
     Generates an answer using Mistral's instruction format.
     """
     # Mistral instruction format
     prompt = f"""<s>[INST] {question} [/INST]"""
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
@@ -91,6 +93,7 @@ def parse_answer(model_output):
 # ---------------------------------------------------------------------------
 # 5. Evaluation routine
 # ---------------------------------------------------------------------------
 def run_evaluation():
     predictions = []
     references = []
@@ -125,10 +128,10 @@ def run_evaluation():
     accuracy = results["accuracy"]
     # Create visualization
     correct_count = sum(p == r for p, r in zip(norm_preds, norm_refs))
     incorrect_count = len(test_data) - correct_count
-    fig, ax = plt.subplots(figsize=(8, 6))
     bars = ax.bar(["Correct", "Incorrect"],
                  [correct_count, incorrect_count],
                  color=["#2ecc71", "#e74c3c"])
@@ -142,7 +145,7 @@ def run_evaluation():
     ax.set_title("Evaluation Results")
     ax.set_ylabel("Count")
-    ax.set_ylim([0, len(test_data) + 0.5])  # Add some padding at top
     # Convert plot to base64
     buf = io.BytesIO()
@@ -176,7 +179,6 @@ def run_evaluation():
     details_html += "</table></div>"
-    # Combine plot and details
     full_html = f"""
     <div>
         <img src="data:image/png;base64,{data}" style="width:100%; max-width:600px;">

 import evaluate
 import re
 import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import io
 import base64
 import os
 from huggingface_hub import login
+import spaces
 # Read token and login
 hf_token = os.getenv("HF_TOKEN_READ_WRITE")
 else:
     print("⚠️ No HF_TOKEN_READ_WRITE found in environment")
 # ---------------------------------------------------------------------------
+# 1. Model and tokenizer setup
 # ---------------------------------------------------------------------------
 model_name = "mistralai/Mistral-7B-Instruct-v0.3"
+tokenizer = None
+model = None
+@spaces.GPU
+def load_model():
+    global tokenizer, model
+    if tokenizer is None:
+        tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
+    if model is None:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            token=hf_token,
+            torch_dtype=torch.float16
+        )
+        model.to('cuda')
+    return model, tokenizer
 # ---------------------------------------------------------------------------
 # 2. Test dataset
 # ---------------------------------------------------------------------------
 # 4. Inference helper functions
 # ---------------------------------------------------------------------------
+@spaces.GPU
 def generate_answer(question):
     """
     Generates an answer using Mistral's instruction format.
     """
+    model, tokenizer = load_model()
     # Mistral instruction format
     prompt = f"""<s>[INST] {question} [/INST]"""
+    inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
 # ---------------------------------------------------------------------------
 # 5. Evaluation routine
 # ---------------------------------------------------------------------------
+@spaces.GPU(duration=120)  # Allow up to 2 minutes for full evaluation
 def run_evaluation():
     predictions = []
     references = []
     accuracy = results["accuracy"]
     # Create visualization
+    fig, ax = plt.subplots(figsize=(8, 6))
     correct_count = sum(p == r for p, r in zip(norm_preds, norm_refs))
     incorrect_count = len(test_data) - correct_count
     bars = ax.bar(["Correct", "Incorrect"],
                  [correct_count, incorrect_count],
                  color=["#2ecc71", "#e74c3c"])
     ax.set_title("Evaluation Results")
     ax.set_ylabel("Count")
+    ax.set_ylim([0, len(test_data) + 0.5])
     # Convert plot to base64
     buf = io.BytesIO()
     details_html += "</table></div>"
     full_html = f"""
     <div>
         <img src="data:image/png;base64,{data}" style="width:100%; max-width:600px;">