Spaces:

sadovsky
/

MBTI

Sleeping

App Files Files Community

QAway-to commited on Oct 25

Commit

b7e18a3

1 Parent(s): 288e23f

New version v1.6

Browse files

Files changed (2) hide show

app.py +33 -18
train.py +0 -29

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-# === 1️⃣ Модель для анализа MBTI ===
 MODEL_ID = "f3nsmart/MBTIclassifier"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
@@ -13,46 +13,61 @@ q_gen = pipeline(
     model="microsoft/Phi-3-mini-4k-instruct",
     temperature=0.6,
     top_p=0.9,
-    max_new_tokens=60
 )
-# === 3️⃣ Функция для одного шага интервью ===
 def mbti_interview(user_input, history):
-    """
-    history — список [(вопрос, ответ), ...]
-    """
-    # инициализация
     if history is None:
         history = []
     if not user_input.strip():
         return history, "⚠️ Введите ответ.", "Please describe yourself.", f"{len(history)}/30"
-    # --- анализ ---
     results = analyzer(user_input)[0]
     results = sorted(results, key=lambda x: x["score"], reverse=True)
     result_text = "\n".join([f"{r['label']} → {r['score']:.3f}" for r in results[:3]])
-    # --- обновляем историю ---
     last_q = history[-1][0] if history else "Initial question"
     history.append((last_q, user_input))
     progress = f"{len(history)}/30"
-    # --- контекст последних 5 пар ---
     convo = "\n".join([f"Q: {q}\nA: {a}" for q, a in history[-5:]])
-    # --- запрос к модели интервьюера ---
     prompt = (
-        "You are an HR interviewer conducting an MBTI personality assessment. "
-        "Based on the following recent dialogue, generate ONE new, natural, short question. "
-        "Avoid repeating any previous questions.\n\n"
-        f"{convo}\n\nNext question:"
     )
     try:
         raw = q_gen(prompt)[0]["generated_text"]
-        next_q = raw.split("?")[0].split("\n")[-1].strip().capitalize() + "?"
         if len(next_q.split()) < 4 or len(next_q) > 140:
             next_q = "What motivates you the most when you start something new?"
     except Exception as e:
         next_q = f"(⚠️ Ошибка генерации вопроса: {e})"
@@ -66,9 +81,9 @@ with gr.Blocks(css="""
     #progress {text-align: center; font-weight: bold; color: #4CAF50; font-size: 18px;}
     textarea {height: 100px !important;}
 """) as demo:
-    gr.Markdown("## 🧠 Adaptive MBTI Classifier\nDynamic conversation with context memory")
-    state = gr.State([])  # сохраняет [(вопрос, ответ)] между итерациями
     question = gr.Textbox(
         label="Вопрос",

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+# === 1️⃣ Модель MBTI ===
 MODEL_ID = "f3nsmart/MBTIclassifier"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
     model="microsoft/Phi-3-mini-4k-instruct",
     temperature=0.6,
     top_p=0.9,
+    max_new_tokens=80
 )
+# === 3️⃣ Интервью ===
 def mbti_interview(user_input, history):
+    """Проводит один шаг диалога"""
     if history is None:
         history = []
     if not user_input.strip():
         return history, "⚠️ Введите ответ.", "Please describe yourself.", f"{len(history)}/30"
+    # --- Анализ MBTI ---
     results = analyzer(user_input)[0]
     results = sorted(results, key=lambda x: x["score"], reverse=True)
     result_text = "\n".join([f"{r['label']} → {r['score']:.3f}" for r in results[:3]])
+    # --- История ---
     last_q = history[-1][0] if history else "Initial question"
     history.append((last_q, user_input))
     progress = f"{len(history)}/30"
+    # --- Составляем контекст последних 5 пар ---
     convo = "\n".join([f"Q: {q}\nA: {a}" for q, a in history[-5:]])
+    # --- Список уже заданных вопросов ---
+    prev_qs = [q for q, _ in history]
+    # --- Новый запрос ---
     prompt = (
+        "You are an HR interviewer performing an MBTI personality assessment. "
+        "You have already asked these questions:\n"
+        + "\n".join(f"- {q}" for q in prev_qs[-10:]) + "\n\n"
+        "Based on the candidate’s latest answer, generate ONE new question that:\n"
+        "• Is unique and not identical to previous ones.\n"
+        "• Is short (under 20 words).\n"
+        "• Is polite and natural.\n"
+        "• Helps understand the person’s motivation or preferences.\n\n"
+        f"Recent dialogue:\n{convo}\n\nNext question:"
     )
     try:
         raw = q_gen(prompt)[0]["generated_text"]
+        # --- очистка ---
+        raw = raw.replace("\n", " ").strip()
+        next_q = raw.split("?")[0]
+        next_q = next_q.split("Next question:")[-1].strip().capitalize() + "?"
+        # --- фильтр повторов ---
+        if any(next_q.lower() == q.lower() for q in prev_qs):
+            next_q = "What do you value most when collaborating with others?"
         if len(next_q.split()) < 4 or len(next_q) > 140:
             next_q = "What motivates you the most when you start something new?"
     except Exception as e:
         next_q = f"(⚠️ Ошибка генерации вопроса: {e})"
     #progress {text-align: center; font-weight: bold; color: #4CAF50; font-size: 18px;}
     textarea {height: 100px !important;}
 """) as demo:
+    gr.Markdown("## 🧠 Adaptive MBTI Classifier\n### Context-aware interviewer with memory and unique questions.")
+    state = gr.State([])
     question = gr.Textbox(
         label="Вопрос",

train.py DELETED Viewed

@@ -1,29 +0,0 @@
-from datasets import load_dataset
-from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
-MODEL = "distilgpt2"
-data = load_dataset("json", data_files={"train": "data/train.jsonl"})
-tok = AutoTokenizer.from_pretrained(MODEL)
-tok.pad_token = tok.eos_token
-def prep(ex):
-    text = f"### Instruction:\n{ex['instruction']}\n### Response:\n{ex['output']}"
-    enc = tok(text, truncation=True, padding="max_length", max_length=256)
-    enc["labels"] = enc["input_ids"].copy()
-    return enc
-ds = data["train"].map(prep)
-model = AutoModelForCausalLM.from_pretrained(MODEL)
-args = TrainingArguments(
-    output_dir="ft_model",
-    num_train_epochs=2,
-    per_device_train_batch_size=1,
-    logging_steps=10,
-    save_strategy="epoch",
-)
-trainer = Trainer(model=model, args=args, train_dataset=ds)
-trainer.train()
-trainer.save_model("ft_model")
-tok.save_pretrained("ft_model")