Spaces:

TTS-AGI
/

Voice-Clone-Arena

Running

App Files Files Community

kemuriririn commited on Jul 2

Commit

2249567

1 Parent(s): a2a66cd

modify elo caculate

Browse files

Files changed (2) hide show

app.py +105 -56
models.py +68 -30

app.py CHANGED Viewed

@@ -662,62 +662,111 @@ def generate_tts():
         )
     # --- End Cache Check ---
-    # --- Cache Miss: Generate on the fly ---
-    app.logger.warning(f"TTS Cache MISS for: '{text[:50]}...'. Generating on the fly.")
-    available_models = Model.query.filter_by(
-        model_type=ModelType.TTS, is_active=True
-    ).all()
-    if len(available_models) < 2:
-        return jsonify({"error": "Not enough TTS models available"}), 500
-    # 新增：a和b模型都需通过缓存和静音检测
-    candidate_models = available_models.copy()
-    random.shuffle(candidate_models)
-    valid_pairs = []
-    # 枚举所有模型对，找到第一个都通过的组合
-    for i in range(len(candidate_models)):
-        model_a = candidate_models[i]
-        audio_a_path = find_cached_audio(str(model_a.id), text, reference_audio_path)
-        app.logger.warning(f"checking {audio_a_path}")
-        if not audio_a_path or has_long_silence(audio_a_path):
-            continue
-        # 检测到a模型音频有效，继续检测b模型
-        for j in range(i + 1, len(candidate_models)):
-            model_b = candidate_models[j]
-            audio_b_path = find_cached_audio(str(model_b.id), text, reference_audio_path)
-            app.logger.warning(f"checking {audio_b_path}")
-            if not audio_b_path or has_long_silence(audio_b_path):
-                continue
-            valid_pairs.append((model_a, audio_a_path, model_b, audio_b_path))
-            app.logger.warning(f"Found valid model pair: {model_a.name} and {model_b.name} for text '{text[:50]}...'")
-            break
-    if not valid_pairs:
-        return jsonify({"error": "所有模型均未通过持久化缓存和静音检测，无法生成音频"}), 500
-    # 随机选一个合格组合
-    model_a, audio_a_path, model_b, audio_b_path = random.choice(valid_pairs)
-    session_id = str(uuid.uuid4())
-    app.tts_sessions[session_id] = {
-        "model_a": model_a.id,
-        "model_b": model_b.id,
-        "audio_a": audio_a_path,
-        "audio_b": audio_b_path,
-        "text": text,
-        "created_at": datetime.utcnow(),
-        "expires_at": datetime.utcnow() + timedelta(minutes=30),
-        "voted": False,
-    }
-    # 清理临时参考音频文件
-    if reference_audio_path and os.path.exists(reference_audio_path):
-        os.remove(reference_audio_path)
-    return jsonify({
-        "session_id": session_id,
-        "audio_a": f"/api/tts/audio/{session_id}/a",
-        "audio_b": f"/api/tts/audio/{session_id}/b",
-        "expires_in": 1800,
-        "cache_hit": True,
-    })
-    # --- End Cache Miss ---
 @app.route("/api/tts/audio/<session_id>/<model_key>")

         )
     # --- End Cache Check ---
+    # --- Cache Miss: Local File Cache ---
+    # 对于预置文本和预置prompt，检查本地缓存
+    if text in predefined_texts and prompt_md5 in predefined_prompts.values():
+        app.logger.warning(f"TTS Cache MISS for: '{text[:50]}...'. Generating on the fly.")
+        available_models = Model.query.filter_by(
+            model_type=ModelType.TTS, is_active=True
+        ).all()
+        if len(available_models) < 2:
+            return jsonify({"error": "Not enough TTS models available"}), 500
+        # 新增：a和b模型都需通过缓存检测
+        candidate_models = available_models.copy()
+        valid_models = []
+        invalid_models = []
+        for model in candidate_models:
+            audio_path = find_cached_audio(model.name, text, prompt_audio_path=reference_audio_path)
+            if audio_path and os.path.exists(audio_path):
+                valid_models.append(model)
+            else:
+                invalid_models.append(model)
+        if len(valid_models) < 2:
+            return jsonify({"error": "Not enough valid TTS model results available"}), 500
+        apply_filter_penalty_and_redistribute(invalid_models, valid_models, penalty_amount=1.0)
+        # 从有结果的模型中随机选择两个
+        model_a,model_b = random.sample(valid_models, 2)
+        audio_a_path = find_cached_audio(model_a.name, text, prompt_audio_path=reference_audio_path)
+        audio_b_path = find_cached_audio(model_b.name, text, prompt_audio_path=reference_audio_path)
+        session_id = str(uuid.uuid4())
+        app.tts_sessions[session_id] = {
+            "model_a": model_a.id,
+            "model_b": model_b.id,
+            "audio_a": audio_a_path,
+            "audio_b": audio_b_path,
+            "text": text,
+            "created_at": datetime.utcnow(),
+            "expires_at": datetime.utcnow() + timedelta(minutes=30),
+            "voted": False,
+        }
+        # 清理临时参考音频文件
+        if reference_audio_path and os.path.exists(reference_audio_path):
+            os.remove(reference_audio_path)
+        return jsonify({
+            "session_id": session_id,
+            "audio_a": f"/api/tts/audio/{session_id}/a",
+            "audio_b": f"/api/tts/audio/{session_id}/b",
+            "expires_in": 1800,
+            "cache_hit": True,
+        })
+        # --- End Cache Miss ---
+    else:
+        app.logger.warning(f"TTS Cache MISS for: '{text[:50]}...'. Generating on the fly.")
+        available_models = Model.query.filter_by(
+            model_type=ModelType.TTS, is_active=True
+        ).all()
+        if len(available_models) < 2:
+            return jsonify({"error": "Not enough TTS models available"}), 500
+        # Get two random models with weighted selection
+        models = get_weighted_random_models(available_models, 2, ModelType.TTS)
+        # Generate audio concurrently using a local executor for clarity within the request
+        with ThreadPoolExecutor(max_workers=2, thread_name_prefix='AudioGen') as audio_executor:
+            future_a = audio_executor.submit(generate_and_save_tts, text, models[0].id, RUNTIME_CACHE_DIR,
+                                             prompt_audio_path=reference_audio_path)
+            future_b = audio_executor.submit(generate_and_save_tts, text, models[1].id, RUNTIME_CACHE_DIR,
+                                             prompt_audio_path=reference_audio_path)
+            timeout_seconds = 120
+            audio_a_path, ref_a = future_a.result(timeout=timeout_seconds)
+            audio_b_path, ref_b = future_b.result(timeout=timeout_seconds)
+        if not audio_a_path or not audio_b_path:
+            return jsonify({"error": "Failed to generate TTS audio"}), 500
+        session_id = str(uuid.uuid4())
+        app.tts_sessions[session_id] = {
+            "model_a": models[0].id,
+            "model_b": models[1].id,
+            "audio_a": audio_a_path,
+            "audio_b": audio_b_path,
+            "text": text,
+            "created_at": datetime.utcnow(),
+            "expires_at": datetime.utcnow() + timedelta(minutes=30),
+            "voted": False,
+        }
+        # Clean up temporary reference audio file if it was provided
+        if reference_audio_path and os.path.exists(reference_audio_path):
+            os.remove(reference_audio_path)
+        # Return response with session ID and audio URLs
+        return jsonify(
+            {
+                "session_id": session_id,
+                "audio_a": f"/api/tts/audio/{session_id}/a",
+                "audio_b": f"/api/tts/audio/{session_id}/b",
+                "expires_in": 1800,  # 30 minutes in seconds
+                "cache_hit": False,
+            }
+        )
 @app.route("/api/tts/audio/<session_id>/<model_key>")

models.py CHANGED Viewed

@@ -84,6 +84,7 @@ class EloHistory(db.Model):
     model_id = db.Column(db.String(100), db.ForeignKey("model.id"), nullable=False)
     timestamp = db.Column(db.DateTime, default=datetime.utcnow)
     elo_score = db.Column(db.Float, nullable=False)
     vote_id = db.Column(db.Integer, db.ForeignKey("vote.id"), nullable=True)
     model_type = db.Column(db.String(20), nullable=False)  # 'tts' or 'conversational'
@@ -130,18 +131,18 @@ def record_vote(user_id, text, chosen_model_id, rejected_model_id, model_type):
         db.session.rollback()
         return None, "One or both models not found for the specified model type"
-    k_factor_winner = get_dynamic_k_factor(chosen_model.match_count)
-    k_factor_loser = get_dynamic_k_factor(rejected_model.match_count)
-    # Calculate new Elo ratings
-    new_chosen_elo, new_rejected_elo = calculate_elo_change_dynamic_k(
-        chosen_model.current_elo, rejected_model.current_elo, k_factor_winner, k_factor_loser
     )
-    # new_chosen_elo, new_rejected_elo = calculate_elo_change(
-    #     chosen_model.current_elo, rejected_model.current_elo
-    # )
     # Update model stats
     chosen_model.current_elo = new_chosen_elo
     chosen_model.win_count += 1
@@ -535,32 +536,69 @@ def toggle_user_leaderboard_visibility(user_id):
     return user.show_in_leaderboard
-def get_dynamic_k_factor(match_count):
-    """
-    使用连续衰减函数动态计算K因子。
-    K因子会从一个最大值平滑地衰减到一个最小值。
-    Args:
-        match_count (int): 模型的总比赛次数。
-    Returns:
-        float: 计算出的K因子。
-    """
     k_max = 40  # 新模型的最大K因子
     k_min = 10  # 成熟模型的最小K因子
-    decay_speed = 50.0  # 衰减速度，数值越大，K因子下降越慢
-    # 指数衰减公式: K = K_min + (K_max - K_min) * e^(-match_count / decay_speed)
-    k_factor = k_min + (k_max - k_min) * math.exp(-match_count / decay_speed)
     return k_factor
-def calculate_elo_change_dynamic_k(winner_elo, loser_elo, k_factor_winner, k_factor_loser):
-    """根据双方不同的K因子计算Elo等级分变化。"""
-    expected_winner = 1 / (1 + math.pow(10, (loser_elo - winner_elo) / 400))
-    expected_loser = 1 / (1 + math.pow(10, (winner_elo - loser_elo) / 400))
-    winner_new_elo = winner_elo + k_factor_winner * (1 - expected_winner)
-    loser_new_elo = loser_elo + k_factor_loser * (0 - expected_loser)
-    return winner_new_elo, loser_new_elo

     model_id = db.Column(db.String(100), db.ForeignKey("model.id"), nullable=False)
     timestamp = db.Column(db.DateTime, default=datetime.utcnow)
     elo_score = db.Column(db.Float, nullable=False)
+    by_system = db.Column(db.Boolean, default=False)  # Whether this is a penalty or reward change
     vote_id = db.Column(db.Integer, db.ForeignKey("vote.id"), nullable=True)
     model_type = db.Column(db.String(20), nullable=False)  # 'tts' or 'conversational'
         db.session.rollback()
         return None, "One or both models not found for the specified model type"
+    # --- ELO 计算逻辑与 test_elo.py 保持一致 ---
+    # a. 计算双方的基础动态K因子
+    max_match = max(chosen_model.match_count, rejected_model.match_count, 10)
+    k_winner_base = get_dynamic_k_factor(chosen_model.match_count, max_match)
+    k_loser_base = get_dynamic_k_factor(rejected_model.match_count, max_match)
+    # b. 取平均K因子
+    base_k = (k_winner_base + k_loser_base) / 2.0
+    new_chosen_elo, new_rejected_elo = calculate_elo_change(
+        chosen_model.current_elo, rejected_model.current_elo, k_factor=base_k
     )
     # Update model stats
     chosen_model.current_elo = new_chosen_elo
     chosen_model.win_count += 1
     return user.show_in_leaderboard
+def get_dynamic_k_factor(match_count, max_match_count):
     k_max = 40  # 新模型的最大K因子
     k_min = 10  # 成熟模型的最小K因子
+    decay_factor = 5.0  # 衰减因子，控制K因子下降的速度
+    # 防止除以零
+    if max_match_count == 0:
+        return k_max
+    # 计算相对比赛进度 (0到1之间)
+    relative_progress = match_count / max_match_count
+    # 使用指数衰减公式，但基于相对进度
+    # K = K_min + (K_max - K_min) * e^(-decay_factor * relative_progress)
+    k_factor = k_min + (k_max - k_min) * math.exp(-decay_factor * relative_progress)
     return k_factor
+def apply_filter_penalty_and_redistribute(unavailable_models, available_models, penalty_amount=1.0):
+    """
+    对不可用的模型施加惩罚，并将扣除的分数平均重新分配给可用的模型。
+    这确保了系统的ELO总分保持不变（零和）。
+    Args:
+        unavailable_models (list[Model]): 因被过滤而不可用的模型对象列表。
+        available_models (list[Model]): 当前可用的模型对象列表。
+        penalty_amount (float): 每个不可用模型被扣除的ELO分数。
+    """
+    if not unavailable_models or not available_models:
+        # 如果没有不可用模型或没有可用的模型来接收分数，则不执行任何操作
+        return
+    # 1. 计算总惩罚分数
+    total_penalty = len(unavailable_models) * penalty_amount
+    reward_per_model = total_penalty / len(available_models)
+    # 2. 从不可用模型中扣除分数并记录历史
+    for model in unavailable_models:
+        new_elo = model.current_elo - penalty_amount
+        model.current_elo = new_elo
+        # 为惩罚创建一条历史记录 (没有 vote_id)
+        penalty_history = EloHistory(
+            model_id=model.id,
+            elo_score=new_elo,
+            vote_id=None,
+            by_system=True,
+            model_type=model.model_type,
+        )
+        db.session.add(penalty_history)
+    # 3. 将分数奖励给可用模型并记录历史
+    for model in available_models:
+        new_elo = model.current_elo + reward_per_model
+        model.current_elo = new_elo
+        # 为奖励创建一条历史记录 (没有 vote_id)
+        reward_history = EloHistory(
+            model_id=model.id,
+            elo_score=new_elo,
+            vote_id=None,
+            by_system=True,
+            model_type=model.model_type,
+        )
+        db.session.add(reward_history)
+    # 4. 提交所有更改到数据库
+    db.session.commit()