Spaces:

sadovsky
/

MBTI

Running

App Files Files Community

QAway-to commited on 25 days ago

Commit

8d4e786

1 Parent(s): 995a334

f3nsmart/TinyLlama-MBTI-Interviewer-LoRA. v1.0

Browse files

Files changed (2) hide show

app.py +18 -42
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,78 +1,64 @@
 import gradio as gr
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
     AutoModelForSequenceClassification,
     pipeline
 )
 # ===============================================================
 # 1️⃣ Настройки и модели
 # ===============================================================
-# Fine-tuned MBTI Classifier (твоя модель)
 MBTI_MODEL = "f3nsmart/MBTIclassifier"
-mbti_pipe = pipeline("text-classification", model=MBTI_MODEL, return_all_scores=True)
-# Модель-интервьюер
-INTERVIEWER_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
-tokenizer_qwen = AutoTokenizer.from_pretrained(INTERVIEWER_MODEL)
-model_qwen = AutoModelForCausalLM.from_pretrained(
-    INTERVIEWER_MODEL,
-    torch_dtype="auto",
     device_map="auto"
 )
 llm_pipe = pipeline(
     "text-generation",
-    model=model_qwen,
-    tokenizer=tokenizer_qwen,
     max_new_tokens=70,
     temperature=0.7,
     top_p=0.9,
 )
 # ===============================================================
 # 2️⃣ Вспомогательные функции
 # ===============================================================
 def clean_question(text: str) -> str:
-    """
-    Удаляет все инструкции и оставляет чистый вопрос.
-    """
-    text = text.strip()
-    # Берём только первую строку, если LLM вдруг вывела много
-    text = text.split("\n")[0]
-    # Иногда Qwen вставляет кавычки — убираем
-    text = text.strip('"').strip("'")
-    # Если модель вывела "User:" / "Assistant:" / "Instruction:" и т.п.
     bad_tokens = ["user:", "assistant:", "instruction", "interviewer", "system:"]
     for bad in bad_tokens:
         if bad.lower() in text.lower():
             text = text.split(bad)[-1].strip()
-    # Если вопрос не оканчивается знаком вопроса — добавляем
     if "?" not in text:
         text = text.rstrip(".") + "?"
-    # Мини-страховка от мусора
     if len(text.split()) < 3:
         return "What do you usually enjoy doing in your free time?"
     return text.strip()
 def generate_first_question():
-    """Первый вопрос фиксированный (без ожидания генерации)"""
     return "What do you usually enjoy doing in your free time?"
 def analyze_and_ask(user_text, prev_count):
     if not user_text.strip():
         return "⚠️ Введите ответ.", "", prev_count
     try:
         n = int(prev_count.split("/")[0]) + 1
     except Exception:
@@ -83,7 +69,6 @@ def analyze_and_ask(user_text, prev_count):
     res_sorted = sorted(res, key=lambda x: x["score"], reverse=True)
     mbti_text = "\n".join([f"{r['label']} → {r['score']:.3f}" for r in res_sorted[:3]])
-    # Новый, уточнённый промпт
     prompt = (
         f"User said: '{user_text}'. "
         "Generate one natural, open-ended question that starts with 'What', 'Why', 'How', or 'When'. "
@@ -94,25 +79,18 @@ def analyze_and_ask(user_text, prev_count):
     raw = llm_pipe(prompt)[0]["generated_text"]
     cleaned = clean_question(raw)
-    # Если вопрос не начинается с нужного слова — создаём fallback
-    valid_starts = ("What", "Why", "How", "When")
-    if not cleaned.startswith(valid_starts):
         cleaned = "What motivates you to do the things you enjoy most?"
     return mbti_text, cleaned, counter
 # ===============================================================
 # 3️⃣ Интерфейс Gradio
 # ===============================================================
 with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
     gr.Markdown(
         "## 🧠 MBTI Personality Interviewer\n"
         "Определи личностный тип и получи следующий вопрос от интервьюера."
     )
     with gr.Row():
         with gr.Column(scale=1):
             inp = gr.Textbox(
@@ -127,8 +105,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as
             progress = gr.Textbox(label="⏳ Прогресс", value="0/30")
     btn.click(analyze_and_ask, inputs=[inp, progress], outputs=[mbti_out, interviewer_out, progress])
-    # Автоматическая загрузка первого вопроса
     demo.load(lambda: ("", generate_first_question(), "0/30"), inputs=None, outputs=[mbti_out, interviewer_out, progress])
 demo.launch()

 import gradio as gr
+import torch
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
     AutoModelForSequenceClassification,
     pipeline
 )
+from peft import PeftModel  # 👈 важно для LoRA адаптации
 # ===============================================================
 # 1️⃣ Настройки и модели
 # ===============================================================
 MBTI_MODEL = "f3nsmart/MBTIclassifier"
+INTERVIEWER_BASE = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+INTERVIEWER_LORA = "f3nsmart/TinyLlama-MBTI-Interviewer-LoRA"
+# --- MBTI классификатор ---
+mbti_pipe = pipeline("text-classification", model=MBTI_MODEL, return_all_scores=True)
+# --- Интервьюер TinyLlama + LoRA ---
+print("🔄 Загрузка TinyLlama с адаптером LoRA...")
+tokenizer_llama = AutoTokenizer.from_pretrained(INTERVIEWER_LORA)
+base_model = AutoModelForCausalLM.from_pretrained(
+    INTERVIEWER_BASE,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     device_map="auto"
 )
+model_llora = PeftModel.from_pretrained(base_model, INTERVIEWER_LORA)
 llm_pipe = pipeline(
     "text-generation",
+    model=model_llora,
+    tokenizer=tokenizer_llama,
     max_new_tokens=70,
     temperature=0.7,
     top_p=0.9,
+    device_map="auto"
 )
 # ===============================================================
 # 2️⃣ Вспомогательные функции
 # ===============================================================
 def clean_question(text: str) -> str:
+    text = text.strip().split("\n")[0].strip('"').strip("'")
     bad_tokens = ["user:", "assistant:", "instruction", "interviewer", "system:"]
     for bad in bad_tokens:
         if bad.lower() in text.lower():
             text = text.split(bad)[-1].strip()
     if "?" not in text:
         text = text.rstrip(".") + "?"
     if len(text.split()) < 3:
         return "What do you usually enjoy doing in your free time?"
     return text.strip()
 def generate_first_question():
     return "What do you usually enjoy doing in your free time?"
 def analyze_and_ask(user_text, prev_count):
     if not user_text.strip():
         return "⚠️ Введите ответ.", "", prev_count
     try:
         n = int(prev_count.split("/")[0]) + 1
     except Exception:
     res_sorted = sorted(res, key=lambda x: x["score"], reverse=True)
     mbti_text = "\n".join([f"{r['label']} → {r['score']:.3f}" for r in res_sorted[:3]])
     prompt = (
         f"User said: '{user_text}'. "
         "Generate one natural, open-ended question that starts with 'What', 'Why', 'How', or 'When'. "
     raw = llm_pipe(prompt)[0]["generated_text"]
     cleaned = clean_question(raw)
+    if not cleaned.startswith(("What", "Why", "How", "When")):
         cleaned = "What motivates you to do the things you enjoy most?"
     return mbti_text, cleaned, counter
 # ===============================================================
 # 3️⃣ Интерфейс Gradio
 # ===============================================================
 with gr.Blocks(theme=gr.themes.Soft(), title="MBTI Personality Interviewer") as demo:
     gr.Markdown(
         "## 🧠 MBTI Personality Interviewer\n"
         "Определи личностный тип и получи следующий вопрос от интервьюера."
     )
     with gr.Row():
         with gr.Column(scale=1):
             inp = gr.Textbox(
             progress = gr.Textbox(label="⏳ Прогресс", value="0/30")
     btn.click(analyze_and_ask, inputs=[inp, progress], outputs=[mbti_out, interviewer_out, progress])
     demo.load(lambda: ("", generate_first_question(), "0/30"), inputs=None, outputs=[mbti_out, interviewer_out, progress])
 demo.launch()

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ datasets
 torch
 gradio
 openai
-accelerate

 torch
 gradio
 openai
+accelerate
+peft