Spaces:

sadovsky
/

MBTI

Running

App Files Files Community

QAway-to commited on 25 days ago

Commit

1c31761

1 Parent(s): 87cd86c

Change tokenizer v1.4

Browse files

Files changed (1) hide show

core/interviewer.py +15 -10

core/interviewer.py CHANGED Viewed

@@ -1,25 +1,33 @@
 # core/interviewer.py
 """
 🇬🇧 Interviewer logic module (no instructions)
-Generates random MBTI-style questions using a fine-tuned model.
 🇷🇺 Модуль интервьюера.
-Использует fine-tuned модель для генерации вопросов без промптов и инструкций.
 """
 from transformers import AutoModelForSeq2SeqLM, T5Tokenizer
 QG_MODEL = "f3nsmart/ft-flan-t5-base-qgen"
 tokenizer = T5Tokenizer.from_pretrained(QG_MODEL, use_fast=False)
 model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device).eval()
 print(f"✅ Loaded interviewer model (slow tokenizer): {QG_MODEL}")
 # --------------------------------------------------------------
-# 2️⃣ Базовые seed-промпты (без инструкций)
 # --------------------------------------------------------------
 PROMPTS = [
     "Personality and emotions.",
@@ -33,7 +41,7 @@ PROMPTS = [
 # 3️⃣ Очистка текста
 # --------------------------------------------------------------
 def _clean_question(text: str) -> str:
-    """Берёт первую фразу с '?', обрезает лишнее"""
     text = text.strip()
     m = re.search(r"(.+?\?)", text)
     if m:
@@ -50,9 +58,7 @@ def _clean_question(text: str) -> str:
 # 4️⃣ Генерация вопроса
 # --------------------------------------------------------------
 def generate_question(user_id: str = "default_user", **kwargs) -> str:
-    """
-    Генерирует один MBTI-вопрос без инструкций.
-    """
     prompt = random.choice(PROMPTS)
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
     with torch.no_grad():
@@ -65,5 +71,4 @@ def generate_question(user_id: str = "default_user", **kwargs) -> str:
             max_new_tokens=60,
         )
     text = tokenizer.decode(out[0], skip_special_tokens=True)
-    question = _clean_question(text)
-    return question

 # core/interviewer.py
 """
 🇬🇧 Interviewer logic module (no instructions)
+Generates random MBTI-style questions using the fine-tuned model.
 🇷🇺 Модуль интервьюера.
+Использует fine-tuned модель для генерации вопросов без инструкций.
 """
+import random
+import re
+import torch
 from transformers import AutoModelForSeq2SeqLM, T5Tokenizer
+# --------------------------------------------------------------
+# 1️⃣ Настройки модели
+# --------------------------------------------------------------
 QG_MODEL = "f3nsmart/ft-flan-t5-base-qgen"
+# ✅ Принудительно используем оригинальный SentencePiece-токенайзер
 tokenizer = T5Tokenizer.from_pretrained(QG_MODEL, use_fast=False)
 model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device).eval()
 print(f"✅ Loaded interviewer model (slow tokenizer): {QG_MODEL}")
+print(f"Device set to use {device}")
 # --------------------------------------------------------------
+# 2️⃣ Seed-промпты (без инструкций)
 # --------------------------------------------------------------
 PROMPTS = [
     "Personality and emotions.",
 # 3️⃣ Очистка текста
 # --------------------------------------------------------------
 def _clean_question(text: str) -> str:
+    """Берёт первую фразу с '?'"""
     text = text.strip()
     m = re.search(r"(.+?\?)", text)
     if m:
 # 4️⃣ Генерация вопроса
 # --------------------------------------------------------------
 def generate_question(user_id: str = "default_user", **kwargs) -> str:
+    """Генерирует один MBTI-вопрос без инструкций"""
     prompt = random.choice(PROMPTS)
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
     with torch.no_grad():
             max_new_tokens=60,
         )
     text = tokenizer.decode(out[0], skip_special_tokens=True)
+    return _clean_question(text)