Spaces:

sadovsky
/

MBTI

Sleeping

App Files Files Community

QAway-to commited on about 1 month ago

Commit

87b7e98

1 Parent(s): 3c8cb2b

google/flan-t5-small. Interviewer FIX.

Browse files

Files changed (1) hide show

core/interviewer.py +31 -15

core/interviewer.py CHANGED Viewed

@@ -1,22 +1,34 @@
 # core/interviewer.py
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 QG_MODEL = "google/flan-t5-small"
 tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
-_model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
-# используем заглавное имя, чтобы отличать от параметра в функции
 QG_PIPE = pipeline(
     "text2text-generation",
-    model=_model,
     tokenizer=tokenizer,
     max_new_tokens=40,
     num_beams=4,
     no_repeat_ngram_size=4,
 )
 session_state = {
     "history": {},
     "categories": [
@@ -27,14 +39,15 @@ session_state = {
     ],
 }
 def _clean(q: str) -> str:
     q = (q or "").strip()
-    # выбрасываем служебные словечки, если модель их вставила
-    bad = ["generate", "question", "output", "explain", "instruction", "user said", "based on"]
     lower = q.lower()
     for b in bad:
         if b in lower:
-            # забираем всё после найденной подстроки
             idx = lower.find(b) + len(b)
             q = q[idx:].lstrip(":,. ").strip()
             lower = q.lower()
@@ -44,16 +57,20 @@ def _clean(q: str) -> str:
         q = q.rstrip(".") + "?"
     return q
-def generate_question(user_id: str, user_answer: str = None, qg_pipe=None, **kwargs) -> str:
     """
     Возвращает один новый вопрос по следующей неиспользованной MBTI-оси.
-    Параметр qg_pipe — необязателен, игнорируем если не передали.
-    **kwargs проглатываем, чтобы не падать, если вызывающий код шлёт лишнее.
     """
     history = session_state["history"].get(user_id, {"asked": []})
     asked = history["asked"]
     cats = session_state["categories"]
     if len(asked) >= len(cats):
         return "✅ All MBTI axes covered."
@@ -62,18 +79,17 @@ def generate_question(user_id: str, user_answer: str = None, qg_pipe=None, **kwa
     session_state["history"][user_id] = history
     prompt = (
-        f"Ask one concise, open-ended question about {next_cat}. "
-        f"Start with What/Why/How/When. "
-        f"Do not mention instructions. "
-        f"Do not repeat or quote the user. "
-        f"User context: {user_answer or ''}"
     )
     pipe = qg_pipe or QG_PIPE
     out = pipe(prompt)[0]["generated_text"]
     question = _clean(out)
-    # небольшой страховочный фоллбэк
     if not question or len(question.split()) < 3:
         question = f"What aspects of {next_cat.lower()} best describe you and why?"

 # core/interviewer.py
+"""
+🇬🇧 Interviewer logic module
+Generates MBTI-category-based questions blindly (without reading user input).
+🇷🇺 Модуль интервьюера
+Генерирует вопросы по категориям MBTI, не анализируя ответы пользователя.
+"""
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# --------------------------------------------------------------
+# 1️⃣ Настройки
+# --------------------------------------------------------------
 QG_MODEL = "google/flan-t5-small"
 tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
+model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
 QG_PIPE = pipeline(
     "text2text-generation",
+    model=model,
     tokenizer=tokenizer,
     max_new_tokens=40,
     num_beams=4,
     no_repeat_ngram_size=4,
 )
+# --------------------------------------------------------------
+# 2️⃣ Состояние сессии
+# --------------------------------------------------------------
 session_state = {
     "history": {},
     "categories": [
     ],
 }
+# --------------------------------------------------------------
+# 3️⃣ Очистка текста от инструкций
+# --------------------------------------------------------------
 def _clean(q: str) -> str:
     q = (q or "").strip()
+    bad = ["generate", "question", "output", "instruction", "explain", "user", "context"]
     lower = q.lower()
     for b in bad:
         if b in lower:
             idx = lower.find(b) + len(b)
             q = q[idx:].lstrip(":,. ").strip()
             lower = q.lower()
         q = q.rstrip(".") + "?"
     return q
+# --------------------------------------------------------------
+# 4️⃣ Генерация вопроса
+# --------------------------------------------------------------
+def generate_question(user_id: str, qg_pipe=None, **kwargs) -> str:
     """
     Возвращает один новый вопрос по следующей неиспользованной MBTI-оси.
+    Не использует ответ пользователя.
     """
     history = session_state["history"].get(user_id, {"asked": []})
     asked = history["asked"]
     cats = session_state["categories"]
+    # если все категории пройдены
     if len(asked) >= len(cats):
         return "✅ All MBTI axes covered."
     session_state["history"][user_id] = history
     prompt = (
+        f"Ask one natural, open-ended question about {next_cat}. "
+        f"Start with What, Why, How, or When. "
+        f"Do not include any instructions, explanations, or quotes. "
+        f"Output only the question itself."
     )
     pipe = qg_pipe or QG_PIPE
     out = pipe(prompt)[0]["generated_text"]
     question = _clean(out)
+    # fallback — если модель дала пустой или мусорный текст
     if not question or len(question.split()) < 3:
         question = f"What aspects of {next_cat.lower()} best describe you and why?"