QAway-to commited on
Commit
b7e18a3
·
1 Parent(s): 288e23f

New version v1.6

Browse files
Files changed (2) hide show
  1. app.py +33 -18
  2. train.py +0 -29
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
 
4
- # === 1️⃣ Модель для анализа MBTI ===
5
  MODEL_ID = "f3nsmart/MBTIclassifier"
6
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
7
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
@@ -13,46 +13,61 @@ q_gen = pipeline(
13
  model="microsoft/Phi-3-mini-4k-instruct",
14
  temperature=0.6,
15
  top_p=0.9,
16
- max_new_tokens=60
17
  )
18
 
19
- # === 3️⃣ Функция для одного шага интервью ===
20
  def mbti_interview(user_input, history):
21
- """
22
- history — список [(вопрос, ответ), ...]
23
- """
24
- # инициализация
25
  if history is None:
26
  history = []
 
27
  if not user_input.strip():
28
  return history, "⚠️ Введите ответ.", "Please describe yourself.", f"{len(history)}/30"
29
 
30
- # --- анализ ---
31
  results = analyzer(user_input)[0]
32
  results = sorted(results, key=lambda x: x["score"], reverse=True)
33
  result_text = "\n".join([f"{r['label']} → {r['score']:.3f}" for r in results[:3]])
34
 
35
- # --- обновляем историю ---
36
  last_q = history[-1][0] if history else "Initial question"
37
  history.append((last_q, user_input))
38
  progress = f"{len(history)}/30"
39
 
40
- # --- контекст последних 5 пар ---
41
  convo = "\n".join([f"Q: {q}\nA: {a}" for q, a in history[-5:]])
42
 
43
- # --- запрос к модели интервьюера ---
 
 
 
44
  prompt = (
45
- "You are an HR interviewer conducting an MBTI personality assessment. "
46
- "Based on the following recent dialogue, generate ONE new, natural, short question. "
47
- "Avoid repeating any previous questions.\n\n"
48
- f"{convo}\n\nNext question:"
 
 
 
 
 
49
  )
50
 
51
  try:
52
  raw = q_gen(prompt)[0]["generated_text"]
53
- next_q = raw.split("?")[0].split("\n")[-1].strip().capitalize() + "?"
 
 
 
 
 
 
 
 
54
  if len(next_q.split()) < 4 or len(next_q) > 140:
55
  next_q = "What motivates you the most when you start something new?"
 
56
  except Exception as e:
57
  next_q = f"(⚠️ Ошибка генерации вопроса: {e})"
58
 
@@ -66,9 +81,9 @@ with gr.Blocks(css="""
66
  #progress {text-align: center; font-weight: bold; color: #4CAF50; font-size: 18px;}
67
  textarea {height: 100px !important;}
68
  """) as demo:
69
- gr.Markdown("## 🧠 Adaptive MBTI Classifier\nDynamic conversation with context memory")
70
 
71
- state = gr.State([]) # сохраняет [(вопрос, ответ)] между итерациями
72
 
73
  question = gr.Textbox(
74
  label="Вопрос",
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
 
4
+ # === 1️⃣ Модель MBTI ===
5
  MODEL_ID = "f3nsmart/MBTIclassifier"
6
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
7
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
 
13
  model="microsoft/Phi-3-mini-4k-instruct",
14
  temperature=0.6,
15
  top_p=0.9,
16
+ max_new_tokens=80
17
  )
18
 
19
+ # === 3️⃣ Интервью ===
20
  def mbti_interview(user_input, history):
21
+ """Проводит один шаг диалога"""
 
 
 
22
  if history is None:
23
  history = []
24
+
25
  if not user_input.strip():
26
  return history, "⚠️ Введите ответ.", "Please describe yourself.", f"{len(history)}/30"
27
 
28
+ # --- Анализ MBTI ---
29
  results = analyzer(user_input)[0]
30
  results = sorted(results, key=lambda x: x["score"], reverse=True)
31
  result_text = "\n".join([f"{r['label']} → {r['score']:.3f}" for r in results[:3]])
32
 
33
+ # --- История ---
34
  last_q = history[-1][0] if history else "Initial question"
35
  history.append((last_q, user_input))
36
  progress = f"{len(history)}/30"
37
 
38
+ # --- Составляем контекст последних 5 пар ---
39
  convo = "\n".join([f"Q: {q}\nA: {a}" for q, a in history[-5:]])
40
 
41
+ # --- Список уже заданных вопросов ---
42
+ prev_qs = [q for q, _ in history]
43
+
44
+ # --- Новый запрос ---
45
  prompt = (
46
+ "You are an HR interviewer performing an MBTI personality assessment. "
47
+ "You have already asked these questions:\n"
48
+ + "\n".join(f"- {q}" for q in prev_qs[-10:]) + "\n\n"
49
+ "Based on the candidate’s latest answer, generate ONE new question that:\n"
50
+ "• Is unique and not identical to previous ones.\n"
51
+ "• Is short (under 20 words).\n"
52
+ "• Is polite and natural.\n"
53
+ "• Helps understand the person’s motivation or preferences.\n\n"
54
+ f"Recent dialogue:\n{convo}\n\nNext question:"
55
  )
56
 
57
  try:
58
  raw = q_gen(prompt)[0]["generated_text"]
59
+ # --- очистка ---
60
+ raw = raw.replace("\n", " ").strip()
61
+ next_q = raw.split("?")[0]
62
+ next_q = next_q.split("Next question:")[-1].strip().capitalize() + "?"
63
+
64
+ # --- фильтр повторов ---
65
+ if any(next_q.lower() == q.lower() for q in prev_qs):
66
+ next_q = "What do you value most when collaborating with others?"
67
+
68
  if len(next_q.split()) < 4 or len(next_q) > 140:
69
  next_q = "What motivates you the most when you start something new?"
70
+
71
  except Exception as e:
72
  next_q = f"(⚠️ Ошибка генерации вопроса: {e})"
73
 
 
81
  #progress {text-align: center; font-weight: bold; color: #4CAF50; font-size: 18px;}
82
  textarea {height: 100px !important;}
83
  """) as demo:
84
+ gr.Markdown("## 🧠 Adaptive MBTI Classifier\n### Context-aware interviewer with memory and unique questions.")
85
 
86
+ state = gr.State([])
87
 
88
  question = gr.Textbox(
89
  label="Вопрос",
train.py DELETED
@@ -1,29 +0,0 @@
1
- from datasets import load_dataset
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
3
-
4
- MODEL = "distilgpt2"
5
- data = load_dataset("json", data_files={"train": "data/train.jsonl"})
6
- tok = AutoTokenizer.from_pretrained(MODEL)
7
- tok.pad_token = tok.eos_token
8
-
9
- def prep(ex):
10
- text = f"### Instruction:\n{ex['instruction']}\n### Response:\n{ex['output']}"
11
- enc = tok(text, truncation=True, padding="max_length", max_length=256)
12
- enc["labels"] = enc["input_ids"].copy()
13
- return enc
14
-
15
- ds = data["train"].map(prep)
16
- model = AutoModelForCausalLM.from_pretrained(MODEL)
17
-
18
- args = TrainingArguments(
19
- output_dir="ft_model",
20
- num_train_epochs=2,
21
- per_device_train_batch_size=1,
22
- logging_steps=10,
23
- save_strategy="epoch",
24
- )
25
-
26
- trainer = Trainer(model=model, args=args, train_dataset=ds)
27
- trainer.train()
28
- trainer.save_model("ft_model")
29
- tok.save_pretrained("ft_model")