QAway-to
commited on
Commit
·
b7e18a3
1
Parent(s):
288e23f
New version v1.6
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
| 3 |
|
| 4 |
-
# === 1️⃣ Модель
|
| 5 |
MODEL_ID = "f3nsmart/MBTIclassifier"
|
| 6 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 7 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
|
|
@@ -13,46 +13,61 @@ q_gen = pipeline(
|
|
| 13 |
model="microsoft/Phi-3-mini-4k-instruct",
|
| 14 |
temperature=0.6,
|
| 15 |
top_p=0.9,
|
| 16 |
-
max_new_tokens=
|
| 17 |
)
|
| 18 |
|
| 19 |
-
# === 3️⃣
|
| 20 |
def mbti_interview(user_input, history):
|
| 21 |
-
"""
|
| 22 |
-
history — список [(вопрос, ответ), ...]
|
| 23 |
-
"""
|
| 24 |
-
# инициализация
|
| 25 |
if history is None:
|
| 26 |
history = []
|
|
|
|
| 27 |
if not user_input.strip():
|
| 28 |
return history, "⚠️ Введите ответ.", "Please describe yourself.", f"{len(history)}/30"
|
| 29 |
|
| 30 |
-
# ---
|
| 31 |
results = analyzer(user_input)[0]
|
| 32 |
results = sorted(results, key=lambda x: x["score"], reverse=True)
|
| 33 |
result_text = "\n".join([f"{r['label']} → {r['score']:.3f}" for r in results[:3]])
|
| 34 |
|
| 35 |
-
# ---
|
| 36 |
last_q = history[-1][0] if history else "Initial question"
|
| 37 |
history.append((last_q, user_input))
|
| 38 |
progress = f"{len(history)}/30"
|
| 39 |
|
| 40 |
-
# --- контекст последних 5 пар ---
|
| 41 |
convo = "\n".join([f"Q: {q}\nA: {a}" for q, a in history[-5:]])
|
| 42 |
|
| 43 |
-
# ---
|
|
|
|
|
|
|
|
|
|
| 44 |
prompt = (
|
| 45 |
-
"You are an HR interviewer
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
)
|
| 50 |
|
| 51 |
try:
|
| 52 |
raw = q_gen(prompt)[0]["generated_text"]
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
if len(next_q.split()) < 4 or len(next_q) > 140:
|
| 55 |
next_q = "What motivates you the most when you start something new?"
|
|
|
|
| 56 |
except Exception as e:
|
| 57 |
next_q = f"(⚠️ Ошибка генерации вопроса: {e})"
|
| 58 |
|
|
@@ -66,9 +81,9 @@ with gr.Blocks(css="""
|
|
| 66 |
#progress {text-align: center; font-weight: bold; color: #4CAF50; font-size: 18px;}
|
| 67 |
textarea {height: 100px !important;}
|
| 68 |
""") as demo:
|
| 69 |
-
gr.Markdown("## 🧠 Adaptive MBTI Classifier\
|
| 70 |
|
| 71 |
-
state = gr.State([])
|
| 72 |
|
| 73 |
question = gr.Textbox(
|
| 74 |
label="Вопрос",
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
| 3 |
|
| 4 |
+
# === 1️⃣ Модель MBTI ===
|
| 5 |
MODEL_ID = "f3nsmart/MBTIclassifier"
|
| 6 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 7 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
|
|
|
|
| 13 |
model="microsoft/Phi-3-mini-4k-instruct",
|
| 14 |
temperature=0.6,
|
| 15 |
top_p=0.9,
|
| 16 |
+
max_new_tokens=80
|
| 17 |
)
|
| 18 |
|
| 19 |
+
# === 3️⃣ Интервью ===
|
| 20 |
def mbti_interview(user_input, history):
|
| 21 |
+
"""Проводит один шаг диалога"""
|
|
|
|
|
|
|
|
|
|
| 22 |
if history is None:
|
| 23 |
history = []
|
| 24 |
+
|
| 25 |
if not user_input.strip():
|
| 26 |
return history, "⚠️ Введите ответ.", "Please describe yourself.", f"{len(history)}/30"
|
| 27 |
|
| 28 |
+
# --- Анализ MBTI ---
|
| 29 |
results = analyzer(user_input)[0]
|
| 30 |
results = sorted(results, key=lambda x: x["score"], reverse=True)
|
| 31 |
result_text = "\n".join([f"{r['label']} → {r['score']:.3f}" for r in results[:3]])
|
| 32 |
|
| 33 |
+
# --- История ---
|
| 34 |
last_q = history[-1][0] if history else "Initial question"
|
| 35 |
history.append((last_q, user_input))
|
| 36 |
progress = f"{len(history)}/30"
|
| 37 |
|
| 38 |
+
# --- Составляем контекст последних 5 пар ---
|
| 39 |
convo = "\n".join([f"Q: {q}\nA: {a}" for q, a in history[-5:]])
|
| 40 |
|
| 41 |
+
# --- Список уже заданных вопросов ---
|
| 42 |
+
prev_qs = [q for q, _ in history]
|
| 43 |
+
|
| 44 |
+
# --- Новый запрос ---
|
| 45 |
prompt = (
|
| 46 |
+
"You are an HR interviewer performing an MBTI personality assessment. "
|
| 47 |
+
"You have already asked these questions:\n"
|
| 48 |
+
+ "\n".join(f"- {q}" for q in prev_qs[-10:]) + "\n\n"
|
| 49 |
+
"Based on the candidate’s latest answer, generate ONE new question that:\n"
|
| 50 |
+
"• Is unique and not identical to previous ones.\n"
|
| 51 |
+
"• Is short (under 20 words).\n"
|
| 52 |
+
"• Is polite and natural.\n"
|
| 53 |
+
"• Helps understand the person’s motivation or preferences.\n\n"
|
| 54 |
+
f"Recent dialogue:\n{convo}\n\nNext question:"
|
| 55 |
)
|
| 56 |
|
| 57 |
try:
|
| 58 |
raw = q_gen(prompt)[0]["generated_text"]
|
| 59 |
+
# --- очистка ---
|
| 60 |
+
raw = raw.replace("\n", " ").strip()
|
| 61 |
+
next_q = raw.split("?")[0]
|
| 62 |
+
next_q = next_q.split("Next question:")[-1].strip().capitalize() + "?"
|
| 63 |
+
|
| 64 |
+
# --- фильтр повторов ---
|
| 65 |
+
if any(next_q.lower() == q.lower() for q in prev_qs):
|
| 66 |
+
next_q = "What do you value most when collaborating with others?"
|
| 67 |
+
|
| 68 |
if len(next_q.split()) < 4 or len(next_q) > 140:
|
| 69 |
next_q = "What motivates you the most when you start something new?"
|
| 70 |
+
|
| 71 |
except Exception as e:
|
| 72 |
next_q = f"(⚠️ Ошибка генерации вопроса: {e})"
|
| 73 |
|
|
|
|
| 81 |
#progress {text-align: center; font-weight: bold; color: #4CAF50; font-size: 18px;}
|
| 82 |
textarea {height: 100px !important;}
|
| 83 |
""") as demo:
|
| 84 |
+
gr.Markdown("## 🧠 Adaptive MBTI Classifier\n### Context-aware interviewer with memory and unique questions.")
|
| 85 |
|
| 86 |
+
state = gr.State([])
|
| 87 |
|
| 88 |
question = gr.Textbox(
|
| 89 |
label="Вопрос",
|
train.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
from datasets import load_dataset
|
| 2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
|
| 3 |
-
|
| 4 |
-
MODEL = "distilgpt2"
|
| 5 |
-
data = load_dataset("json", data_files={"train": "data/train.jsonl"})
|
| 6 |
-
tok = AutoTokenizer.from_pretrained(MODEL)
|
| 7 |
-
tok.pad_token = tok.eos_token
|
| 8 |
-
|
| 9 |
-
def prep(ex):
|
| 10 |
-
text = f"### Instruction:\n{ex['instruction']}\n### Response:\n{ex['output']}"
|
| 11 |
-
enc = tok(text, truncation=True, padding="max_length", max_length=256)
|
| 12 |
-
enc["labels"] = enc["input_ids"].copy()
|
| 13 |
-
return enc
|
| 14 |
-
|
| 15 |
-
ds = data["train"].map(prep)
|
| 16 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL)
|
| 17 |
-
|
| 18 |
-
args = TrainingArguments(
|
| 19 |
-
output_dir="ft_model",
|
| 20 |
-
num_train_epochs=2,
|
| 21 |
-
per_device_train_batch_size=1,
|
| 22 |
-
logging_steps=10,
|
| 23 |
-
save_strategy="epoch",
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
trainer = Trainer(model=model, args=args, train_dataset=ds)
|
| 27 |
-
trainer.train()
|
| 28 |
-
trainer.save_model("ft_model")
|
| 29 |
-
tok.save_pretrained("ft_model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|