Spaces:

aiqtech
/

rag

Sleeping

App Files Files Community

aiqtech commited on Aug 21

Commit

5576ce9

verified ·

1 Parent(s): eddae4d

Update app-backup.py

Browse files

Files changed (1) hide show

app-backup.py +689 -330

app-backup.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """
 ⚡ Speed-Optimized Multi-Agent RAG System for Complex Questions
-병렬 처리, 스마트 캐싱, 동적 파이프라인으로 복잡한 질문도 빠르게 처리
 """
 import os
@@ -8,6 +10,8 @@ import json
 import time
 import asyncio
 import hashlib
 from typing import Optional, List, Dict, Any, Tuple, Generator, AsyncGenerator
 from datetime import datetime, timedelta
 from enum import Enum
@@ -59,105 +63,60 @@ class AgentResponse(BaseModel):
 # ============================================================================
-# 스마트 캐싱 시스템
 # ============================================================================
-class SmartCache:
-    """지능형 캐싱 시스템"""
-    def __init__(self, max_size: int = 100, ttl_hours: int = 24):
-        self.cache = {}
-        self.access_count = {}
-        self.timestamps = {}
-        self.max_size = max_size
-        self.ttl = timedelta(hours=ttl_hours)
-        self.reasoning_patterns = self._init_reasoning_patterns()
-    def _init_reasoning_patterns(self) -> Dict:
-        """자주 사용되는 추론 패턴 초기화"""
-        return {
-            "analysis": {
-                "structure": ["현황 분석", "핵심 요인", "영향 평가", "전략 제안"],
-                "keywords": ["분석", "평가", "영향", "전략"]
-            },
-            "comparison": {
-                "structure": ["대상 정의", "비교 기준", "장단점 분석", "결론"],
-                "keywords": ["비교", "차이", "장단점", "vs"]
-            },
-            "creative": {
-                "structure": ["문제 정의", "창의적 접근", "구현 방법", "예상 효과"],
-                "keywords": ["창의적", "혁신적", "새로운", "아이디어"]
-            },
-            "technical": {
-                "structure": ["기술 개요", "핵심 원리", "구현 상세", "실용 예시"],
-                "keywords": ["기술", "구현", "코드", "시스템"]
-            }
-        }
-    def get_query_hash(self, query: str) -> str:
-        """쿼리 해시 생성"""
-        return hashlib.md5(query.encode()).hexdigest()
-    def get(self, query: str) -> Optional[Dict]:
-        """캐시에서 조회"""
-        query_hash = self.get_query_hash(query)
-        if query_hash in self.cache:
-            # TTL 체크
-            if datetime.now() - self.timestamps[query_hash] < self.ttl:
-                self.access_count[query_hash] += 1
-                return self.cache[query_hash]
-            else:
-                # 만료된 캐시 삭제
-                del self.cache[query_hash]
-                del self.timestamps[query_hash]
-                del self.access_count[query_hash]
-        return None
-    def set(self, query: str, response: Dict):
-        """캐시에 저장"""
-        query_hash = self.get_query_hash(query)
-        # 캐시 크기 관리
-        if len(self.cache) >= self.max_size:
-            # LRU 정책: 가장 적게 사용된 항목 제거
-            least_used = min(self.access_count, key=self.access_count.get)
-            del self.cache[least_used]
-            del self.timestamps[least_used]
-            del self.access_count[least_used]
-        self.cache[query_hash] = response
-        self.timestamps[query_hash] = datetime.now()
-        self.access_count[query_hash] = 1
-    def get_reasoning_pattern(self, query: str) -> Optional[Dict]:
-        """쿼리에 적합한 추론 패턴 반환"""
-        query_lower = query.lower()
-        for pattern_type, pattern_data in self.reasoning_patterns.items():
-            if any(keyword in query_lower for keyword in pattern_data["keywords"]):
-                return {
-                    "type": pattern_type,
-                    "structure": pattern_data["structure"]
-                }
-        return None
 # ============================================================================
-# 병렬 처리 최적화 Brave Search
 # ============================================================================
 class AsyncBraveSearch:
-    """비동기 Brave 검색 클라이언트"""
     def __init__(self, api_key: Optional[str] = None):
         self.api_key = api_key or os.getenv("BRAVE_SEARCH_API_KEY")
         self.base_url = "https://api.search.brave.com/res/v1/web/search"
-    async def search_async(self, query: str, count: int = 5) -> List[Dict]:
-        """비동기 검색"""
         if not self.api_key:
             return []
@@ -166,48 +125,70 @@ class AsyncBraveSearch:
             "X-Subscription-Token": self.api_key
         }
         params = {
             "q": query,
             "count": count,
             "text_decorations": False,
-            "search_lang": "ko",
-            "country": "KR"
         }
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(
-                    self.base_url,
-                    headers=headers,
-                    params=params,
-                    timeout=aiohttp.ClientTimeout(total=5)
-                ) as response:
-                    if response.status == 200:
-                        data = await response.json()
-                        results = []
-                        if "web" in data and "results" in data["web"]:
-                            for item in data["web"]["results"][:count]:
-                                results.append({
-                                    "title": item.get("title", ""),
-                                    "url": item.get("url", ""),
-                                    "description": item.get("description", ""),
-                                    "age": item.get("age", "")
-                                })
-                        return results
-        except:
-            return []
         return []
 # ============================================================================
-# 최적화된 Fireworks 클라이언트
 # ============================================================================
 class OptimizedFireworksClient:
-    """최적화된 LLM 클라이언트"""
     def __init__(self, api_key: Optional[str] = None):
         self.api_key = api_key or os.getenv("FIREWORKS_API_KEY")
@@ -223,13 +204,32 @@ class OptimizedFireworksClient:
         # 항상 최고 성능 모델 사용 (복잡한 질문 전제)
         self.model = "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
     async def chat_stream_async(
         self,
         messages: List[Dict],
         **kwargs
     ) -> AsyncGenerator[str, None]:
-        """비동기 스트리밍 대화"""
         payload = {
             "model": self.model,
@@ -241,34 +241,43 @@ class OptimizedFireworksClient:
             "stream": True
         }
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    self.base_url,
-                    headers={**self.headers, "Accept": "text/event-stream"},
-                    json=payload,
-                    timeout=aiohttp.ClientTimeout(total=30)
-                ) as response:
-                    async for line in response.content:
-                        line_str = line.decode('utf-8').strip()
-                        if line_str.startswith("data: "):
-                            data_str = line_str[6:]
-                            if data_str == "[DONE]":
-                                break
-                            try:
-                                data = json.loads(data_str)
-                                if "choices" in data and len(data["choices"]) > 0:
-                                    delta = data["choices"][0].get("delta", {})
-                                    if "content" in delta:
-                                        yield delta["content"]
-                            except json.JSONDecodeError:
-                                continue
-        except Exception as e:
-            yield f"오류: {str(e)}"
 # ============================================================================
-# 경량화된 추론 체인
 # ============================================================================
 class LightweightReasoningChain:
@@ -276,28 +285,70 @@ class LightweightReasoningChain:
     def __init__(self):
         self.templates = {
-            "problem_solving": {
-                "steps": ["문제 분해", "핵심 요인", "해결 방안", "구현 전략"],
-                "prompt": "체계적으로 단계별로 분석하고 해결책을 제시하세요."
-            },
-            "creative_thinking": {
-                "steps": ["기존 접근", "창의적 대안", "혁신 포인트", "실행 방법"],
-                "prompt": "기존 방식을 넘어선 창의적이고 혁신적인 접근을 제시하세요."
             },
-            "critical_analysis": {
-                "steps": ["현황 평가", "강점/약점", "기회/위협", "개선 방향"],
-                "prompt": "비판적 관점에서 철저히 분석하고 개선점을 도출하세요."
             }
         }
-    def get_reasoning_structure(self, query_type: str) -> Dict:
         """쿼리 유형에 맞는 추론 구조 반환"""
-        # 기본값은 problem_solving
-        return self.templates.get(query_type, self.templates["problem_solving"])
 # ============================================================================
-# 조기 종료 메커니즘
 # ============================================================================
 class QualityChecker:
@@ -312,16 +363,27 @@ class QualityChecker:
             "clarity": 0.2
         }
-    def evaluate_response(self, response: str, query: str) -> Tuple[float, bool]:
-        """응답 품질 평가"""
         scores = {}
         # 길이 평가
-        scores["length"] = min(len(response) / 1000, 1.0)  # 1000자 기준
-        # 구조 평가
-        structure_markers = ["1.", "2.", "•", "-", "첫째", "둘째", "결론", "요약"]
-        scores["structure"] = sum(1 for m in structure_markers if m in response) / len(structure_markers)
         # 완전성 평가 (쿼리 키워드 포함 여부)
         query_words = set(query.split())
@@ -329,9 +391,16 @@ class QualityChecker:
         scores["completeness"] = len(query_words & response_words) / max(len(query_words), 1)
         # 명확성 평가 (문장 구조)
-        sentences = response.split('.')
         avg_sentence_length = sum(len(s.split()) for s in sentences) / max(len(sentences), 1)
-        scores["clarity"] = min(avg_sentence_length / 20, 1.0)  # 20단어 기준
         # 가중 평균 계산
         total_score = sum(
@@ -345,29 +414,41 @@ class QualityChecker:
 # ============================================================================
-# 스트리밍 최적화
 # ============================================================================
 class OptimizedStreaming:
-    """스트리밍 버퍼 최적화"""
-    def __init__(self, chunk_size: int = 100, flush_interval: float = 0.1):
         self.chunk_size = chunk_size
         self.flush_interval = flush_interval
         self.buffer = ""
         self.last_flush = time.time()
     async def buffer_and_yield(
         self,
-        stream: AsyncGenerator[str, None]
     ) -> AsyncGenerator[str, None]:
-        """버퍼링된 스트리밍"""
         async for chunk in stream:
             self.buffer += chunk
             current_time = time.time()
-            if (len(self.buffer) >= self.chunk_size or
                 current_time - self.last_flush >= self.flush_interval):
                 yield self.buffer
@@ -380,85 +461,212 @@ class OptimizedStreaming:
 # ============================================================================
-# 통합 최적화 멀티 에이전트 시스템
 # ============================================================================
 class SpeedOptimizedMultiAgentSystem:
-    """속도 최적화된 멀티 에이전트 시스템"""
     def __init__(self):
         self.llm = OptimizedFireworksClient()
         self.search = AsyncBraveSearch()
-        self.cache = SmartCache()
         self.reasoning = LightweightReasoningChain()
         self.quality_checker = QualityChecker()
         self.streaming = OptimizedStreaming()
-        # 컴팩트 프롬프트
-        self.compact_prompts = self._init_compact_prompts()
         # 병렬 처리 풀
         self.executor = ThreadPoolExecutor(max_workers=4)
-    def _init_compact_prompts(self) -> Dict:
-        """압축된 고효율 프롬프트"""
-        return {
-            AgentRole.SUPERVISOR: """[감독자-구조설계]
 즉시분석: 핵심의도+필요정보+답변구조
 출력: 5개 핵심포인트(각 1문장)
 추론체계 명시""",
-            AgentRole.CREATIVE: """[창의성생성자]
 입력구조 따라 창의적 확장
 실용예시+혁신접근+구체조언
 불필요설명 제거""",
-            AgentRole.CRITIC: """[비평자-검증]
 신속검토: 정확성/논리성/실용성
 개선포인트 3개만
 각 2문장 이내""",
-            AgentRole.FINALIZER: """[최종통합]
 모든의견 종합→최적답변
 명확구조+실용정보+창의균형
-핵심먼저+상세는후순위"""
         }
     async def parallel_process_agents(
         self,
         query: str,
         search_results: List[Dict],
-        show_progress: bool = True
     ) -> AsyncGenerator[Tuple[str, str], None]:
-        """병렬 처리 파이프라인"""
         start_time = time.time()
         search_context = self._format_search_results(search_results)
         accumulated_response = ""
         agent_thoughts = ""
-        # 캐시 확인
-        cached = self.cache.get(query)
-        if cached:
-            yield cached["response"], "✨ 캐시에서 즉시 로드"
-            return
         # 추론 패턴 결정
-        reasoning_pattern = self.cache.get_reasoning_pattern(query)
         try:
             # === 1단계: 감독자 + 검색 병렬 실행 ===
             if show_progress:
-                agent_thoughts = "### 🚀 병렬 처리 시작\n"
-                agent_thoughts += "👔 감독자 분석 + 🔍 추가 검색 동시 진행...\n\n"
                 yield accumulated_response, agent_thoughts
-            # 감독자 프롬프트
-            supervisor_prompt = f"""
 질문: {query}
 검색결과: {search_context}
 추론패턴: {reasoning_pattern}
-즉시 핵심구조 5개 제시"""
             supervisor_response = ""
             supervisor_task = self.llm.chat_stream_async(
@@ -474,23 +682,54 @@ class SpeedOptimizedMultiAgentSystem:
             async for chunk in self.streaming.buffer_and_yield(supervisor_task):
                 supervisor_response += chunk
                 if show_progress and len(supervisor_response) < 300:
-                    agent_thoughts = f"### 👔 감독자 분석\n{supervisor_response[:300]}...\n\n"
                     yield accumulated_response, agent_thoughts
             # === 2단계: 창의성 + 비평 준비 병렬 ===
             if show_progress:
-                agent_thoughts += "### 🎨 창의성 생성자 + 🔍 비평자 준비...\n\n"
                 yield accumulated_response, agent_thoughts
-            # 창의성 생성 시작
-            creative_prompt = f"""
 질문: {query}
 감독자구조: {supervisor_response}
 검색결과: {search_context}
-창의적+실용적 답변 즉시생성"""
             creative_response = ""
-            creative_partial = ""  # 비평자용 부분 응답
             critic_started = False
             critic_response = ""
@@ -512,11 +751,27 @@ class SpeedOptimizedMultiAgentSystem:
                 if len(creative_partial) > 500 and not critic_started:
                     critic_started = True
-                    # 비평자 비동기 시작
-                    critic_prompt = f"""
 원본질문: {query}
 창의성답변(일부): {creative_partial}
-신속검토→개선점3개"""
                     critic_task = asyncio.create_task(
                         self._run_critic_async(critic_prompt)
@@ -524,7 +779,13 @@ class SpeedOptimizedMultiAgentSystem:
                 if show_progress:
                     display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
-                    agent_thoughts = f"### 🎨 창의성 생성자\n{display_creative}\n\n"
                     yield accumulated_response, agent_thoughts
             # 비평자 결과 대기
@@ -532,41 +793,76 @@ class SpeedOptimizedMultiAgentSystem:
                 critic_response = await critic_task
                 if show_progress:
-                    agent_thoughts += f"### 🔍 비평자 검토\n{critic_response[:200]}...\n\n"
                     yield accumulated_response, agent_thoughts
             # === 3단계: 품질 체크 및 조기 종료 ===
             quality_score, need_more = self.quality_checker.evaluate_response(
-                creative_response, query
             )
             if not need_more and quality_score > 0.85:
                 # 품질이 충분히 높으면 바로 반환
-                accumulated_response = creative_response
                 if show_progress:
-                    agent_thoughts += f"### ✅ 품질 충족 (점수: {quality_score:.2f})\n조기 완료!\n"
-                # 캐시 저장
-                self.cache.set(query, {
-                    "response": accumulated_response,
-                    "timestamp": datetime.now()
-                })
                 yield accumulated_response, agent_thoughts
                 return
             # === 4단계: 최종 통합 (스트리밍) ===
             if show_progress:
-                agent_thoughts += "### ✅ 최종 통합 중...\n\n"
                 yield accumulated_response, agent_thoughts
-            final_prompt = f"""
 질문: {query}
 창의성답변: {creative_response}
 비평피드백: {critic_response}
 감독자구조: {supervisor_response}
-최종통합→완벽답변"""
             final_task = self.llm.chat_stream_async(
                 messages=[
@@ -579,28 +875,39 @@ class SpeedOptimizedMultiAgentSystem:
             # 최종 답변 스트리밍
             accumulated_response = ""
-            async for chunk in self.streaming.buffer_and_yield(final_task):
                 accumulated_response += chunk
-                yield accumulated_response, agent_thoughts
-            # 처리 시간 추가
-            processing_time = time.time() - start_time
-            accumulated_response += f"\n\n---\n⚡ 처리 시간: {processing_time:.1f}초"
-            # 캐시 저장
-            self.cache.set(query, {
-                "response": accumulated_response,
-                "timestamp": datetime.now()
-            })
             yield accumulated_response, agent_thoughts
         except Exception as e:
-            error_msg = f"❌ 오류 발생: {str(e)}"
-            yield error_msg, agent_thoughts
     async def _run_critic_async(self, prompt: str) -> str:
-        """비평자 비동기 실행"""
         try:
             response = ""
             async for chunk in self.llm.chat_stream_async(
@@ -613,27 +920,37 @@ class SpeedOptimizedMultiAgentSystem:
             ):
                 response += chunk
             return response
-        except:
-            return "비평 처리 중 오류"
     def _format_search_results(self, results: List[Dict]) -> str:
         """검색 결과 압축 포맷"""
         if not results:
-            return "검색결과없음"
         formatted = []
-        for i, r in enumerate(results[:3], 1):  # 상위 3개만
-            formatted.append(f"[{i}]{r.get('title','')[:50]}:{r.get('description','')[:100]}")
         return " | ".join(formatted)
 # ============================================================================
-# Gradio UI (최적화 버전)
 # ============================================================================
 def create_optimized_gradio_interface():
-    """최적화된 Gradio 인터페이스"""
     # 시스템 초기화
     system = SpeedOptimizedMultiAgentSystem()
@@ -643,57 +960,67 @@ def create_optimized_gradio_interface():
         history: List[Dict],
         use_search: bool,
         show_agent_thoughts: bool,
-        search_count: int
     ):
-        """최적화된 쿼리 처리 - 동기 버전"""
         if not message:
             yield history, "", ""
             return
         # 비동기 함수를 동기적으로 실행
         try:
             import nest_asyncio
             nest_asyncio.apply()
         except ImportError:
-            pass  # nest_asyncio가 없어도 진행
-        def run_async_function(coro):
-            """비동기 함수를 동기적으로 실행하는 헬퍼"""
-            try:
-                loop = asyncio.get_event_loop()
-                if loop.is_running():
-                    # 이미 실행 중인 루프가 있으면 새 스레드에서 실행
-                    import concurrent.futures
-                    with concurrent.futures.ThreadPoolExecutor() as executor:
-                        future = executor.submit(asyncio.run, coro)
-                        return future.result()
-                else:
-                    return loop.run_until_complete(coro)
-            except RuntimeError:
-                # 루프가 없으면 새로 생성
-                return asyncio.run(coro)
         try:
             # 검색 수행 (동기화)
             search_results = []
             search_display = ""
             if use_search:
                 # 검색 상태 표시
                 history_with_message = history + [
                     {"role": "user", "content": message},
-                    {"role": "assistant", "content": "⚡ 고속 처리 중..."}
                 ]
                 yield history_with_message, "", ""
                 # 비동기 검색을 동기적으로 실행
-                search_results = run_async_function(
-                    system.search.search_async(message, count=search_count)
-                )
                 if search_results:
-                    search_display = "## 📚 참고 자료\n\n"
                     for i, result in enumerate(search_results[:3], 1):
                         search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
                         search_display += f"   {result['description'][:100]}...\n\n"
@@ -701,37 +1028,55 @@ def create_optimized_gradio_interface():
             # 사용자 메시지 추가
             current_history = history + [{"role": "user", "content": message}]
-            # 병렬 처리 실행을 동기적으로 수집
-            async def collect_responses():
-                responses = []
                 async for response, thoughts in system.parallel_process_agents(
                     query=message,
                     search_results=search_results,
-                    show_progress=show_agent_thoughts
                 ):
-                    responses.append((response, thoughts))
-                return responses
-            # 모든 응답 수집
-            all_responses = run_async_function(collect_responses())
-            # 수집된 응답을 yield
-            for response, thoughts in all_responses:
-                updated_history = current_history + [
-                    {"role": "assistant", "content": response}
-                ]
-                yield updated_history, thoughts, search_display
         except Exception as e:
             error_history = history + [
                 {"role": "user", "content": message},
-                {"role": "assistant", "content": f"❌ 오류: {str(e)}"}
             ]
             yield error_history, "", ""
     # Gradio 인터페이스
     with gr.Blocks(
-        title="⚡ Speed-Optimized Multi-Agent System",
         theme=gr.themes.Soft(),
         css="""
         .gradio-container {
@@ -741,50 +1086,57 @@ def create_optimized_gradio_interface():
         """
     ) as demo:
         gr.Markdown("""
-        # ⚡ 고속 Multi-Agent RAG System
-        ### 복잡한 질문도 5초 이내 처리 목표
-        **최적화 기술:**
-        - 🚀 병렬 처리: 에이전트 동시 실행
-        - 💾 스마트 캐싱: 자주 묻는 패턴 즉시 응답
-        - ⚡ 스트리밍 버퍼: 네트워크 최적화
-        - 🎯 조기 종료: 품질 충족 시 즉시 완료
         """)
         with gr.Row():
             with gr.Column(scale=3):
                 chatbot = gr.Chatbot(
                     height=500,
-                    label="💬 대화",
                     type="messages"
                 )
                 msg = gr.Textbox(
-                    label="복잡한 질문 입력",
-                    placeholder="분석, 전략, 창의적 해결이 필요한 복잡한 질문을 입력하세요...",
                     lines=3
                 )
                 with gr.Row():
-                    submit = gr.Button("⚡ 고속 처리", variant="primary")
-                    clear = gr.Button("🔄 초기화")
-                with gr.Accordion("🤖 에이전트 처리 과정", open=False):
                     agent_thoughts = gr.Markdown()
-                with gr.Accordion("📚 검색 소스", open=False):
                     search_sources = gr.Markdown()
             with gr.Column(scale=1):
-                gr.Markdown("### ⚙️ 설정")
                 use_search = gr.Checkbox(
-                    label="🔍 웹 검색 사용",
                     value=True
                 )
                 show_agent_thoughts = gr.Checkbox(
-                    label="🧠 처리 과정 표시",
                     value=True
                 )
@@ -793,33 +1145,40 @@ def create_optimized_gradio_interface():
                     maximum=10,
                     value=5,
                     step=1,
-                    label="검색 결과 수"
                 )
                 gr.Markdown("""
-                ### ⚡ 최적화 상태
-                **활성화된 최적화:**
-                - ✅ 병렬 처리
-                - ✅ 스마트 캐싱
-                - ✅ 버퍼 스트리밍
-                - ✅ 조기 종료
-                - ✅ 압축 프롬프트
-                **예상 처리 시간:**
-                - 캐시 히트: < 1초
-                - 일반 질문: 3-5초
-                - 복잡한 질문: 5-8초
                 """)
-        # 복잡한 질문 예제
         gr.Examples(
             examples=[
                 "AI 기술이 향후 10년간 한국 경제에 미칠 영향을 다각도로 분석하고 대응 전략을 제시해줘",
                 "스타트업이 대기업과 경쟁하기 위한 혁신적인 전략을 단계별로 수립해줘",
-                "기후변화 대응을 위한 창의적인 비즈니스 모델 5가지를 구체적으로 설계해줘",
-                "양자컴퓨터가 현재 암호화 체계에 미칠 영향과 대안을 기술적으로 분석해줘",
-                "메타버스 시대의 교육 혁신 방안을 실제 구현 가능한 수준으로 제안해줘"
             ],
             inputs=msg
         )
@@ -827,7 +1186,7 @@ def create_optimized_gradio_interface():
         # 이벤트 바인딩
         submit.click(
             process_query_optimized,
-            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count],
             outputs=[chatbot, agent_thoughts, search_sources]
         ).then(
             lambda: "",
@@ -837,7 +1196,7 @@ def create_optimized_gradio_interface():
         msg.submit(
             process_query_optimized,
-            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count],
             outputs=[chatbot, agent_thoughts, search_sources]
         ).then(
             lambda: "",
@@ -861,25 +1220,25 @@ def create_optimized_gradio_interface():
 if __name__ == "__main__":
     print("""
 ╔══════════════════════════════════════════════════════════════╗
-║        ⚡ Speed-Optimized Multi-Agent RAG System ⚡         ║
 ║                                                              ║
-║    복잡한 질문도 5초 이내 처리하는 고속 AI 시스템           ║
 ║                                                              ║
-║  최적화 기술:                                                ║
-║  • 병렬 처리 파이프라인                                     ║
-║  • 스마트 캐싱 시스템                                       ║
-║  • 스트리밍 버퍼 최적화                                     ║
-║  • 품질 기반 조기 종료                                      ║
-║  • 압축 프롬프트 엔지니어링                                 ║
 ╚══════════════════════════════════════════════════════════════╝
     """)
     # API 키 확인
     if not os.getenv("FIREWORKS_API_KEY"):
-        print("\n⚠️  FIREWORKS_API_KEY가 설정되지 않았습니다.")
     if not os.getenv("BRAVE_SEARCH_API_KEY"):
-        print("\n⚠️  BRAVE_SEARCH_API_KEY가 설정되지 않았습니다.")
     # Gradio 앱 실행
     demo = create_optimized_gradio_interface()
@@ -887,8 +1246,8 @@ if __name__ == "__main__":
     is_hf_spaces = os.getenv("SPACE_ID") is not None
     if is_hf_spaces:
-        print("\n🤗 Hugging Face Spaces에서 최적화 모드로 실행 중...")
         demo.launch(server_name="0.0.0.0", server_port=7860)
     else:
-        print("\n💻 로컬 환경에서 최적화 모드로 실행 중...")
         demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

 """
 ⚡ Speed-Optimized Multi-Agent RAG System for Complex Questions
+병렬 처리, 동적 파이프라인으로 복잡한 질문도 빠르게 처리
+Enhanced with multi-language support and improved error handling
+(캐싱 기능 제거 버전)
 """
 import os
 import time
 import asyncio
 import hashlib
+import re
+import sys
 from typing import Optional, List, Dict, Any, Tuple, Generator, AsyncGenerator
 from datetime import datetime, timedelta
 from enum import Enum
 # ============================================================================
+# 언어 감지 유틸리티
 # ============================================================================
+class LanguageDetector:
+    """언어 감지 및 처리 유틸리티"""
+    @staticmethod
+    def detect_language(text: str) -> str:
+        """간단한 언어 감지"""
+        import re
+        # 한글 패턴
+        korean_pattern = re.compile('[가-힣]+')
+        # 일본어 패턴 (히라가나, 가타카나)
+        japanese_pattern = re.compile('[ぁ-ん]+|[ァ-ヴー]+')
+        # 중국어 패턴
+        chinese_pattern = re.compile('[\u4e00-\u9fff]+')
+        # 텍스트 길이 대비 각 언어 문자 비율 계산
+        text_length = len(text)
+        if text_length == 0:
+            return 'en'
+        korean_chars = len(korean_pattern.findall(text))
+        japanese_chars = len(japanese_pattern.findall(text))
+        chinese_chars = len(chinese_pattern.findall(text))
+        # 한글 비율이 10% 이상이면 한국어
+        if korean_chars > 0 and (korean_chars / text_length > 0.1):
+            return 'ko'
+        # 일본어 문자가 있으면 일본어
+        elif japanese_chars > 0:
+            return 'ja'
+        # 중국어 문자가 있으면 중국어
+        elif chinese_chars > 0:
+            return 'zh'
+        else:
+            return 'en'
 # ============================================================================
+# 병렬 처리 최적화 Brave Search (개선됨)
 # ============================================================================
 class AsyncBraveSearch:
+    """비동기 Brave 검색 클라이언트 with retry logic"""
     def __init__(self, api_key: Optional[str] = None):
         self.api_key = api_key or os.getenv("BRAVE_SEARCH_API_KEY")
         self.base_url = "https://api.search.brave.com/res/v1/web/search"
+        self.max_retries = 3
+    async def search_async(self, query: str, count: int = 5, lang: str = 'ko') -> List[Dict]:
+        """비동기 검색 with retry"""
         if not self.api_key:
             return []
             "X-Subscription-Token": self.api_key
         }
+        # 언어별 파라미터 설정
+        lang_params = {
+            'ko': {"search_lang": "ko", "country": "KR"},
+            'en': {"search_lang": "en", "country": "US"},
+            'ja': {"search_lang": "ja", "country": "JP"},
+            'zh': {"search_lang": "zh", "country": "CN"}
+        }
         params = {
             "q": query,
             "count": count,
             "text_decorations": False,
+            **lang_params.get(lang, lang_params['en'])
         }
+        for attempt in range(self.max_retries):
+            try:
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(
+                        self.base_url,
+                        headers=headers,
+                        params=params,
+                        timeout=aiohttp.ClientTimeout(total=5)
+                    ) as response:
+                        if response.status == 200:
+                            data = await response.json()
+                            results = []
+                            if "web" in data and "results" in data["web"]:
+                                for item in data["web"]["results"][:count]:
+                                    results.append({
+                                        "title": item.get("title", ""),
+                                        "url": item.get("url", ""),
+                                        "description": item.get("description", ""),
+                                        "age": item.get("age", "")
+                                    })
+                            return results
+                        elif response.status == 429:  # Rate limit
+                            await asyncio.sleep(2 ** attempt)
+                            continue
+            except aiohttp.ClientError as e:
+                if attempt < self.max_retries - 1:
+                    await asyncio.sleep(2 ** attempt)  # Exponential backoff
+                    continue
+            except Exception:
+                pass
         return []
+    async def batch_search(self, queries: List[str], lang: str = 'ko') -> List[List[Dict]]:
+        """여러 검색을 배치로 처리"""
+        tasks = [self.search_async(q, lang=lang) for q in queries]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        # 예외 처리
+        return [r if not isinstance(r, Exception) else [] for r in results]
 # ============================================================================
+# 최적화된 Fireworks 클라이언트 (개선됨)
 # ============================================================================
 class OptimizedFireworksClient:
+    """최적화된 LLM 클라이언트 with language support"""
     def __init__(self, api_key: Optional[str] = None):
         self.api_key = api_key or os.getenv("FIREWORKS_API_KEY")
         # 항상 최고 성능 모델 사용 (복잡한 질문 전제)
         self.model = "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
+        self.max_retries = 3
+    def compress_prompt(self, text: str, max_length: int = 2000) -> str:
+        """프롬프트 압축"""
+        if len(text) <= max_length:
+            return text
+        # 중요한 부분 우선순위로 자르기
+        sentences = text.split('.')
+        compressed = []
+        current_length = 0
+        for sentence in sentences:
+            if current_length + len(sentence) > max_length:
+                break
+            compressed.append(sentence)
+            current_length += len(sentence)
+        return '.'.join(compressed)
     async def chat_stream_async(
         self,
         messages: List[Dict],
         **kwargs
     ) -> AsyncGenerator[str, None]:
+        """비동기 스트리밍 대화 with retry"""
         payload = {
             "model": self.model,
             "stream": True
         }
+        for attempt in range(self.max_retries):
+            try:
+                async with aiohttp.ClientSession() as session:
+                    async with session.post(
+                        self.base_url,
+                        headers={**self.headers, "Accept": "text/event-stream"},
+                        json=payload,
+                        timeout=aiohttp.ClientTimeout(total=30)
+                    ) as response:
+                        async for line in response.content:
+                            line_str = line.decode('utf-8').strip()
+                            if line_str.startswith("data: "):
+                                data_str = line_str[6:]
+                                if data_str == "[DONE]":
+                                    break
+                                try:
+                                    data = json.loads(data_str)
+                                    if "choices" in data and len(data["choices"]) > 0:
+                                        delta = data["choices"][0].get("delta", {})
+                                        if "content" in delta:
+                                            yield delta["content"]
+                                except json.JSONDecodeError:
+                                    continue
+                        return  # Success
+            except aiohttp.ClientError as e:
+                if attempt < self.max_retries - 1:
+                    await asyncio.sleep(2 ** attempt)
+                    continue
+                else:
+                    yield f"Error after {self.max_retries} attempts: {str(e)}"
+            except Exception as e:
+                yield f"Unexpected error: {str(e)}"
+                break
 # ============================================================================
+# 경량화된 추론 체인 (다국어 지원)
 # ============================================================================
 class LightweightReasoningChain:
     def __init__(self):
         self.templates = {
+            "ko": {
+                "problem_solving": {
+                    "steps": ["문제 분해", "핵심 요인", "해결 방안", "구현 전략"],
+                    "prompt": "체계적으로 단계별로 분석하고 해결책을 제시하세요."
+                },
+                "creative_thinking": {
+                    "steps": ["기존 접근", "창의적 대안", "혁신 포인트", "실행 방법"],
+                    "prompt": "기존 방식을 넘어선 창의적이고 혁신적인 접근을 제시하세요."
+                },
+                "critical_analysis": {
+                    "steps": ["현황 평가", "강점/약점", "기회/위협", "개선 방향"],
+                    "prompt": "비판적 관점에서 철저히 분석하고 개선점을 도출하세요."
+                }
             },
+            "en": {
+                "problem_solving": {
+                    "steps": ["Problem Breakdown", "Key Factors", "Solutions", "Implementation Strategy"],
+                    "prompt": "Systematically analyze step by step and provide solutions."
+                },
+                "creative_thinking": {
+                    "steps": ["Traditional Approach", "Creative Alternatives", "Innovation Points", "Execution Method"],
+                    "prompt": "Provide creative and innovative approaches beyond conventional methods."
+                },
+                "critical_analysis": {
+                    "steps": ["Current Assessment", "Strengths/Weaknesses", "Opportunities/Threats", "Improvement Direction"],
+                    "prompt": "Thoroughly analyze from a critical perspective and derive improvements."
+                }
             }
         }
+    def get_reasoning_structure(self, query_type: str, lang: str = 'ko') -> Dict:
         """쿼리 유형에 맞는 추론 구조 반환"""
+        lang_templates = self.templates.get(lang, self.templates['en'])
+        return lang_templates.get(query_type, lang_templates["problem_solving"])
+    def get_reasoning_pattern(self, query: str, lang: str = 'ko') -> Optional[Dict]:
+        """쿼리에 적합한 추론 패턴 반환"""
+        query_lower = query.lower()
+        # 언어별 키워드 매핑
+        pattern_keywords = {
+            'ko': {
+                'problem_solving': ['해결', '방법', '전략', '계획'],
+                'creative_thinking': ['창의적', '혁신적', '새로운', '아이디어'],
+                'critical_analysis': ['분석', '평가', '비교', '영향']
+            },
+            'en': {
+                'problem_solving': ['solve', 'solution', 'strategy', 'plan'],
+                'creative_thinking': ['creative', 'innovative', 'novel', 'idea'],
+                'critical_analysis': ['analyze', 'evaluate', 'compare', 'impact']
+            }
+        }
+        keywords = pattern_keywords.get(lang, pattern_keywords['en'])
+        for pattern_type, words in keywords.items():
+            if any(word in query_lower for word in words):
+                return self.get_reasoning_structure(pattern_type, lang)
+        return self.get_reasoning_structure('problem_solving', lang)
 # ============================================================================
+# 조기 종료 메커니즘 (개선됨)
 # ============================================================================
 class QualityChecker:
             "clarity": 0.2
         }
+    def evaluate_response(self, response: str, query: str, lang: str = 'ko') -> Tuple[float, bool]:
+        """응답 품질 평가 (언어별)"""
         scores = {}
+        # 언어별 최소 길이 기준
+        min_length = {'ko': 500, 'en': 400, 'ja': 400, 'zh': 300}
+        target_length = min_length.get(lang, 400)
         # 길이 평가
+        scores["length"] = min(len(response) / target_length, 1.0)
+        # 구조 평가 (언어별 마커)
+        structure_markers = {
+            'ko': ["1.", "2.", "•", "-", "첫째", "둘째", "결론", "요약"],
+            'en': ["1.", "2.", "•", "-", "First", "Second", "Conclusion", "Summary"],
+            'ja': ["1.", "2.", "•", "-", "第一", "第二", "結論", "要約"],
+            'zh': ["1.", "2.", "•", "-", "第一", "第二", "结论", "总结"]
+        }
+        markers = structure_markers.get(lang, structure_markers['en'])
+        scores["structure"] = sum(1 for m in markers if m in response) / len(markers)
         # 완전성 평가 (쿼리 키워드 포함 여부)
         query_words = set(query.split())
         scores["completeness"] = len(query_words & response_words) / max(len(query_words), 1)
         # 명확성 평가 (문장 구조)
+        sentence_delimiters = {
+            'ko': '.',
+            'en': '.',
+            'ja': '。',
+            'zh': '。'
+        }
+        delimiter = sentence_delimiters.get(lang, '.')
+        sentences = response.split(delimiter)
         avg_sentence_length = sum(len(s.split()) for s in sentences) / max(len(sentences), 1)
+        scores["clarity"] = min(avg_sentence_length / 20, 1.0)
         # 가중 평균 계산
         total_score = sum(
 # ============================================================================
+# 스트리밍 최적화 (개선됨)
 # ============================================================================
 class OptimizedStreaming:
+    """스트리밍 버퍼 최적화 with adaptive buffering"""
+    def __init__(self, chunk_size: int = 20, flush_interval: float = 0.05):
         self.chunk_size = chunk_size
         self.flush_interval = flush_interval
         self.buffer = ""
         self.last_flush = time.time()
+        self.adaptive_size = chunk_size
     async def buffer_and_yield(
         self,
+        stream: AsyncGenerator[str, None],
+        adaptive: bool = True
     ) -> AsyncGenerator[str, None]:
+        """버퍼링된 스트리밍 with adaptive sizing"""
+        chunk_count = 0
         async for chunk in stream:
             self.buffer += chunk
             current_time = time.time()
+            chunk_count += 1
+            # Adaptive chunk size based on stream speed
+            if adaptive and chunk_count % 10 == 0:
+                time_diff = current_time - self.last_flush
+                if time_diff < 0.02:  # Too fast, increase buffer
+                    self.adaptive_size = min(self.adaptive_size + 5, 100)
+                elif time_diff > 0.1:  # Too slow, decrease buffer
+                    self.adaptive_size = max(self.adaptive_size - 5, 10)
+            if (len(self.buffer) >= self.adaptive_size or
                 current_time - self.last_flush >= self.flush_interval):
                 yield self.buffer
 # ============================================================================
+# 응답 후처리 유틸리티
+# ============================================================================
+class ResponseCleaner:
+    """응답 정리 및 포맷팅"""
+    @staticmethod
+    def clean_response(response: str) -> str:
+        """불필요한 마크업 제거 강화"""
+        # 마크다운 헤더 제거
+        response = re.sub(r'^#{1,6}\s+', '', response, flags=re.MULTILINE)
+        # 불필요한 구분선 제거
+        response = re.sub(r'\*{2,}|_{2,}|-{3,}', '', response)
+        # 중복 공백 제거
+        response = re.sub(r'\n{3,}', '\n\n', response)
+        # 특정 패턴 제거
+        unwanted_patterns = [
+            r'\| --- # 🌱 \*\*최종통합 답변:',
+            r'\*\*–오류: ---',
+            r'^\s*\*\*\[.*?\]\*\*\s*',  # [태그] 형식 제거
+            r'^\s*###\s*',  # ### 제거
+            r'^\s*##\s*',   # ## 제거
+            r'^\s*#\s*'     # # 제거
+        ]
+        for pattern in unwanted_patterns:
+            response = re.sub(pattern, '', response, flags=re.MULTILINE)
+        return response.strip()
+# ============================================================================
+# 통합 최적화 멀티 에이전트 시스템 (캐싱 제거 버전)
 # ============================================================================
 class SpeedOptimizedMultiAgentSystem:
+    """속도 최적화된 멀티 에이전트 시스템 (캐싱 없음)"""
     def __init__(self):
         self.llm = OptimizedFireworksClient()
         self.search = AsyncBraveSearch()
         self.reasoning = LightweightReasoningChain()
         self.quality_checker = QualityChecker()
         self.streaming = OptimizedStreaming()
+        self.language_detector = LanguageDetector()
+        self.response_cleaner = ResponseCleaner()
         # 병렬 처리 풀
         self.executor = ThreadPoolExecutor(max_workers=4)
+    def _init_compact_prompts(self, lang: str = 'ko') -> Dict:
+        """압축된 고효율 프롬프트 (언어별)"""
+        prompts = {
+            'ko': {
+                AgentRole.SUPERVISOR: """[감독자-구조설계]
 즉시분석: 핵심의도+필요정보+답변구조
 출력: 5개 핵심포인트(각 1문장)
 추론체계 명시""",
+                AgentRole.CREATIVE: """[창의성생성자]
 입력구조 따라 창의적 확장
 실용예시+혁신접근+구체조언
 불필요설명 제거""",
+                AgentRole.CRITIC: """[비평자-검증]
 신속검토: 정확성/논리성/실용성
 개선포인트 3개만
 각 2문장 이내""",
+                AgentRole.FINALIZER: """[최종통합]
 모든의견 종합→최적답변
 명확구조+실용정보+창의균형
+바로 핵심 내용부터 시작. 불필요한 헤더나 마크업 없이. 마크다운 헤더(#, ##, ###) 사용 금지."""
+            },
+            'en': {
+                AgentRole.SUPERVISOR: """[Supervisor-Structure]
+Immediate analysis: core intent+required info+answer structure
+Output: 5 key points (1 sentence each)
+Clear reasoning framework""",
+                AgentRole.CREATIVE: """[Creative Generator]
+Follow structure, expand creatively
+Practical examples+innovative approach+specific advice
+Remove unnecessary explanations""",
+                AgentRole.CRITIC: """[Critic-Verification]
+Quick review: accuracy/logic/practicality
+Only 3 improvement points
+Max 2 sentences each""",
+                AgentRole.FINALIZER: """[Final Integration]
+Synthesize all inputs→optimal answer
+Clear structure+practical info+creative balance
+Start with core content directly. No unnecessary headers or markup. No markdown headers (#, ##, ###)."""
+            },
+            'ja': {
+                AgentRole.SUPERVISOR: """[監督者-構造設計]
+即時分析：核心意図+必要情報+回答構造
+出力：5つの核心ポイント（各1文）
+推論体系明示""",
+                AgentRole.CREATIVE: """[創造性生成者]
+入力構造に従って創造的拡張
+実用例+革新的アプローチ+具体的アドバイス
+不要な説明削除""",
+                AgentRole.CRITIC: """[批評者-検証]
+迅速レビュー：正確性/論理性/実用性
+改善ポイント3つのみ
+各2文以内""",
+                AgentRole.FINALIZER: """[最終統合]
+全意見統合→最適回答
+明確構造+実用情報+創造性バランス
+核心内容から直接開始。不要なヘッダーやマークアップなし。マークダウンヘッダー（#、##、###）使用禁止。"""
+            },
+            'zh': {
+                AgentRole.SUPERVISOR: """[主管-结构设计]
+立即分析：核心意图+所需信息+答案结构
+输出：5个核心要点（每个1句）
+推理体系明确""",
+                AgentRole.CREATIVE: """[创意生成器]
+按结构创造性扩展
+实用示例+创新方法+具体建议
+删除不必要的解释""",
+                AgentRole.CRITIC: """[评论家-验证]
+快速审查：准确性/逻辑性/实用性
+仅3个改进点
+每个最多2句""",
+                AgentRole.FINALIZER: """[最终整合]
+综合所有意见→最佳答案
+清晰结构+实用信息+创意平衡
+直接从核心内容开始。无需不必要的标题或标记。禁止使用Markdown标题（#、##、###）。"""
+            }
         }
+        return prompts.get(lang, prompts['en'])
     async def parallel_process_agents(
         self,
         query: str,
         search_results: List[Dict],
+        show_progress: bool = True,
+        lang: str = None
     ) -> AsyncGenerator[Tuple[str, str], None]:
+        """병렬 처리 파이프라인 (캐싱 없음)"""
         start_time = time.time()
+        # 언어 자동 감지
+        if lang is None:
+            lang = self.language_detector.detect_language(query)
+        # 언어별 프롬프트 설정
+        self.compact_prompts = self._init_compact_prompts(lang)
         search_context = self._format_search_results(search_results)
         accumulated_response = ""
         agent_thoughts = ""
         # 추론 패턴 결정
+        reasoning_pattern = self.reasoning.get_reasoning_pattern(query, lang)
         try:
             # === 1단계: 감독자 + 검색 병렬 실행 ===
             if show_progress:
+                progress_msg = {
+                    'ko': "🚀 병렬 처리 시작\n👔 감독자 분석 + 🔍 추가 검색 동시 진행...\n\n",
+                    'en': "🚀 Starting parallel processing\n👔 Supervisor analysis + 🔍 Additional search in progress...\n\n",
+                    'ja': "🚀 並列処理開始\n👔 監督者分析 + 🔍 追加検索同時進行中...\n\n",
+                    'zh': "🚀 开始并行处理\n👔 主管分析 + 🔍 附加搜索同时进行...\n\n"
+                }
+                agent_thoughts = progress_msg.get(lang, progress_msg['en'])
                 yield accumulated_response, agent_thoughts
+            # 감독자 프롬프트 (언어별)
+            supervisor_prompt_templates = {
+                'ko': f"""
 질문: {query}
 검색결과: {search_context}
 추론패턴: {reasoning_pattern}
+즉시 핵심구조 5개 제시""",
+                'en': f"""
+Question: {query}
+Search results: {search_context}
+Reasoning pattern: {reasoning_pattern}
+Immediately provide 5 key structures""",
+                'ja': f"""
+質問: {query}
+検索結果: {search_context}
+推論パターン: {reasoning_pattern}
+即座に5つの核心構造を提示""",
+                'zh': f"""
+问题: {query}
+搜索结果: {search_context}
+推理模式: {reasoning_pattern}
+立即提供5个核心结构"""
+            }
+            supervisor_prompt = supervisor_prompt_templates.get(lang, supervisor_prompt_templates['en'])
             supervisor_response = ""
             supervisor_task = self.llm.chat_stream_async(
             async for chunk in self.streaming.buffer_and_yield(supervisor_task):
                 supervisor_response += chunk
                 if show_progress and len(supervisor_response) < 300:
+                    supervisor_label = {
+                        'ko': "👔 감독자 분석",
+                        'en': "👔 Supervisor Analysis",
+                        'ja': "👔 監督者分析",
+                        'zh': "👔 主管分析"
+                    }
+                    agent_thoughts = f"{supervisor_label.get(lang, supervisor_label['en'])}\n{supervisor_response[:300]}...\n\n"
                     yield accumulated_response, agent_thoughts
             # === 2단계: 창의성 + 비평 준비 병렬 ===
             if show_progress:
+                creative_msg = {
+                    'ko': "🎨 창의성 생성자 + 🔍 비평자 준비...\n\n",
+                    'en': "🎨 Creative Generator + 🔍 Critic preparing...\n\n",
+                    'ja': "🎨 創造性生成者 + 🔍 批評者準備中...\n\n",
+                    'zh': "🎨 创意生成器 + 🔍 评论家准备中...\n\n"
+                }
+                agent_thoughts += creative_msg.get(lang, creative_msg['en'])
                 yield accumulated_response, agent_thoughts
+            # 창의성 생성 시작 (언어별)
+            creative_prompt_templates = {
+                'ko': f"""
 질문: {query}
 감독자구조: {supervisor_response}
 검색결과: {search_context}
+창의적+실용적 답변 즉시생성""",
+                'en': f"""
+Question: {query}
+Supervisor structure: {supervisor_response}
+Search results: {search_context}
+Generate creative+practical answer immediately""",
+                'ja': f"""
+質問: {query}
+監督者構造: {supervisor_response}
+検索結果: {search_context}
+創造的+実用的回答即座生成""",
+                'zh': f"""
+问题: {query}
+主管结构: {supervisor_response}
+搜索结果: {search_context}
+立即生成创意+实用答案"""
+            }
+            creative_prompt = creative_prompt_templates.get(lang, creative_prompt_templates['en'])
             creative_response = ""
+            creative_partial = ""
             critic_started = False
             critic_response = ""
                 if len(creative_partial) > 500 and not critic_started:
                     critic_started = True
+                    # 비평자 비동기 시작 (언어별)
+                    critic_prompt_templates = {
+                        'ko': f"""
 원본질문: {query}
 창의성답변(일부): {creative_partial}
+신속검토→개선점3개""",
+                        'en': f"""
+Original question: {query}
+Creative answer (partial): {creative_partial}
+Quick review→3 improvements""",
+                        'ja': f"""
+元の質問: {query}
+創造的回答（一部）: {creative_partial}
+迅速レビュー→改善点3つ""",
+                        'zh': f"""
+原始问题: {query}
+创意答案（部分）: {creative_partial}
+快速审查→3个改进点"""
+                    }
+                    critic_prompt = critic_prompt_templates.get(lang, critic_prompt_templates['en'])
                     critic_task = asyncio.create_task(
                         self._run_critic_async(critic_prompt)
                 if show_progress:
                     display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
+                    creative_label = {
+                        'ko': "🎨 창의성 생성자",
+                        'en': "🎨 Creative Generator",
+                        'ja': "🎨 創造性生成者",
+                        'zh': "🎨 创意生成器"
+                    }
+                    agent_thoughts = f"{creative_label.get(lang, creative_label['en'])}\n{display_creative}\n\n"
                     yield accumulated_response, agent_thoughts
             # 비평자 결과 대기
                 critic_response = await critic_task
                 if show_progress:
+                    critic_label = {
+                        'ko': "🔍 비평자 검토",
+                        'en': "🔍 Critic Review",
+                        'ja': "🔍 批評者レビュー",
+                        'zh': "🔍 评论家审查"
+                    }
+                    agent_thoughts += f"{critic_label.get(lang, critic_label['en'])}\n{critic_response[:200]}...\n\n"
                     yield accumulated_response, agent_thoughts
             # === 3단계: 품질 체크 및 조기 종료 ===
             quality_score, need_more = self.quality_checker.evaluate_response(
+                creative_response, query, lang
             )
             if not need_more and quality_score > 0.85:
                 # 품질이 충분히 높으면 바로 반환
+                accumulated_response = self.response_cleaner.clean_response(creative_response)
                 if show_progress:
+                    quality_msg = {
+                        'ko': f"✅ 품질 충족 (점수: {quality_score:.2f})\n조기 완료!\n",
+                        'en': f"✅ Quality met (score: {quality_score:.2f})\nEarly completion!\n",
+                        'ja': f"✅ 品質満足 (スコア: {quality_score:.2f})\n早期完了!\n",
+                        'zh': f"✅ 质量满足 (分数: {quality_score:.2f})\n提前完成!\n"
+                    }
+                    agent_thoughts += quality_msg.get(lang, quality_msg['en'])
                 yield accumulated_response, agent_thoughts
                 return
             # === 4단계: 최종 통합 (스트리밍) ===
             if show_progress:
+                final_msg = {
+                    'ko': "✅ 최종 통합 중...\n\n",
+                    'en': "✅ Final integration in progress...\n\n",
+                    'ja': "✅ 最終統合中...\n\n",
+                    'zh': "✅ 最终整合中...\n\n"
+                }
+                agent_thoughts += final_msg.get(lang, final_msg['en'])
                 yield accumulated_response, agent_thoughts
+            # 최종 프롬프트 (언어별)
+            final_prompt_templates = {
+                'ko': f"""
 질문: {query}
 창의성답변: {creative_response}
 비평피드백: {critic_response}
 감독자구조: {supervisor_response}
+최종통합→완벽답변. 마크다운 헤더(#, ##, ###) 사용 금지.""",
+                'en': f"""
+Question: {query}
+Creative answer: {creative_response}
+Critic feedback: {critic_response}
+Supervisor structure: {supervisor_response}
+Final integration→perfect answer. No markdown headers (#, ##, ###).""",
+                'ja': f"""
+質問: {query}
+創造的回答: {creative_response}
+批評フィードバック: {critic_response}
+監督者構造: {supervisor_response}
+最終統合→完璧な回答。マークダウンヘッダー（#、##、###）使用禁止。""",
+                'zh': f"""
+问题: {query}
+创意答案: {creative_response}
+评论反馈: {critic_response}
+主管结构: {supervisor_response}
+最终整合→完美答案。禁止使用Markdown标题（#、##、###）。"""
+            }
+            final_prompt = final_prompt_templates.get(lang, final_prompt_templates['en'])
             final_task = self.llm.chat_stream_async(
                 messages=[
             # 최종 답변 스트리밍
             accumulated_response = ""
+            async for chunk in final_task:
                 accumulated_response += chunk
+                # 실시간 정리
+                cleaned_response = self.response_cleaner.clean_response(accumulated_response)
+                yield cleaned_response, agent_thoughts
+            # 최종 정리
+            accumulated_response = self.response_cleaner.clean_response(accumulated_response)
+            # 처리 시간 추가 (언어별)
+            processing_time = time.time() - start_time
+            time_msg = {
+                'ko': f"\n\n---\n⚡ 처리 시간: {processing_time:.1f}초",
+                'en': f"\n\n---\n⚡ Processing time: {processing_time:.1f} seconds",
+                'ja': f"\n\n---\n⚡ 処理時間: {processing_time:.1f}秒",
+                'zh': f"\n\n---\n⚡ 处理时间: {processing_time:.1f}秒"
+            }
+            accumulated_response += time_msg.get(lang, time_msg['en'])
             yield accumulated_response, agent_thoughts
         except Exception as e:
+            error_msg = {
+                'ko': f"❌ 오류 발생: {str(e)}",
+                'en': f"❌ Error occurred: {str(e)}",
+                'ja': f"❌ エラー発生: {str(e)}",
+                'zh': f"❌ 发生错误: {str(e)}"
+            }
+            yield error_msg.get(lang, error_msg['en']), agent_thoughts
     async def _run_critic_async(self, prompt: str) -> str:
+        """비평자 비동기 실행 with error handling"""
         try:
             response = ""
             async for chunk in self.llm.chat_stream_async(
             ):
                 response += chunk
             return response
+        except Exception as e:
+            # 언어 감지
+            lang = 'ko' if '질문' in prompt else 'en'
+            error_msg = {
+                'ko': "비평 처리 중 오류",
+                'en': "Error during critic processing",
+                'ja': "批評処理中のエラー",
+                'zh': "评论处理中出错"
+            }
+            return error_msg.get(lang, error_msg['en'])
     def _format_search_results(self, results: List[Dict]) -> str:
         """검색 결과 압축 포맷"""
         if not results:
+            return "No search results"
         formatted = []
+        for i, r in enumerate(results[:3], 1):
+            title = r.get('title', '')[:50]
+            desc = r.get('description', '')[:100]
+            formatted.append(f"[{i}]{title}:{desc}")
         return " | ".join(formatted)
 # ============================================================================
+# Gradio UI (최적화 버전 - 캐싱 제거)
 # ============================================================================
 def create_optimized_gradio_interface():
+    """최적화된 Gradio 인터페이스 (캐싱 없음)"""
     # 시스템 초기화
     system = SpeedOptimizedMultiAgentSystem()
         history: List[Dict],
         use_search: bool,
         show_agent_thoughts: bool,
+        search_count: int,
+        language_mode: str
     ):
+        """최적화된 쿼리 처리 - 실시간 스트리밍 버전"""
         if not message:
             yield history, "", ""
             return
+        # 언어 설정
+        if language_mode == "Auto":
+            lang = None  # 자동 감지
+        else:
+            lang_map = {"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh"}
+            lang = lang_map.get(language_mode, None)
         # 비동기 함수를 동기적으로 실행
         try:
             import nest_asyncio
             nest_asyncio.apply()
         except ImportError:
+            pass
         try:
             # 검색 수행 (동기화)
             search_results = []
             search_display = ""
+            # 언어 자동 감지 (필요한 경우)
+            detected_lang = lang or system.language_detector.detect_language(message)
             if use_search:
                 # 검색 상태 표시
+                processing_msg = {
+                    'ko': "⚡ 고속 처리 중...",
+                    'en': "⚡ High-speed processing...",
+                    'ja': "⚡ 高速処理中...",
+                    'zh': "⚡ 高速处理中..."
+                }
                 history_with_message = history + [
                     {"role": "user", "content": message},
+                    {"role": "assistant", "content": processing_msg.get(detected_lang, processing_msg['en'])}
                 ]
                 yield history_with_message, "", ""
                 # 비동기 검색을 동기적으로 실행
+                async def search_wrapper():
+                    return await system.search.search_async(message, count=search_count, lang=detected_lang)
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                search_results = loop.run_until_complete(search_wrapper())
                 if search_results:
+                    ref_label = {
+                        'ko': "📚 참고 자료",
+                        'en': "📚 References",
+                        'ja': "📚 参考資料",
+                        'zh': "📚 参���资料"
+                    }
+                    search_display = f"{ref_label.get(detected_lang, ref_label['en'])}\n\n"
                     for i, result in enumerate(search_results[:3], 1):
                         search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
                         search_display += f"   {result['description'][:100]}...\n\n"
             # 사용자 메시지 추가
             current_history = history + [{"role": "user", "content": message}]
+            # 실시간 스트리밍을 위한 비동기 처리
+            async def stream_responses():
+                """실시간 스트리밍 제너레이터"""
                 async for response, thoughts in system.parallel_process_agents(
                     query=message,
                     search_results=search_results,
+                    show_progress=show_agent_thoughts,
+                    lang=detected_lang
                 ):
+                    yield response, thoughts
+            # 새 이벤트 루프에서 실시간 스트리밍
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            # 비동기 제너레이터를 동기적으로 순회
+            gen = stream_responses()
+            while True:
+                try:
+                    # 다음 항목 가져오기
+                    task = asyncio.ensure_future(gen.__anext__(), loop=loop)
+                    response, thoughts = loop.run_until_complete(task)
+                    # 실시간 업데이트
+                    updated_history = current_history + [
+                        {"role": "assistant", "content": response}
+                    ]
+                    yield updated_history, thoughts, search_display
+                except StopAsyncIteration:
+                    break
         except Exception as e:
             error_history = history + [
                 {"role": "user", "content": message},
+                {"role": "assistant", "content": f"❌ Error: {str(e)}"}
             ]
             yield error_history, "", ""
+        finally:
+            # 루프 정리
+            try:
+                loop.close()
+            except:
+                pass
     # Gradio 인터페이스
     with gr.Blocks(
+        title="⚡ Speed-Optimized Multi-Agent System (No Cache)",
         theme=gr.themes.Soft(),
         css="""
         .gradio-container {
         """
     ) as demo:
         gr.Markdown("""
+        # ⚡ Enhanced Multi-Agent RAG System (캐싱 제거 버전)
+        **Complex questions processed within 5-8 seconds | Multi-language support**
+        **Optimization Features:**
+        - 🚀 Parallel Processing: Concurrent agent execution
+        - ⚡ Stream Buffering: Network optimization
+        - 🎯 Early Termination: Complete immediately when quality is met
+        - 🌍 Multi-language: Auto-detect Korean/English/Japanese/Chinese
+        - ❌ **Caching Disabled**: 캐싱 기능 제거됨
         """)
         with gr.Row():
             with gr.Column(scale=3):
                 chatbot = gr.Chatbot(
                     height=500,
+                    label="💬 Chat",
                     type="messages"
                 )
                 msg = gr.Textbox(
+                    label="Enter complex question",
+                    placeholder="Enter complex questions requiring analysis, strategy, or creative solutions...",
                     lines=3
                 )
                 with gr.Row():
+                    submit = gr.Button("⚡ High-Speed Process", variant="primary")
+                    clear = gr.Button("🔄 Reset")
+                with gr.Accordion("🤖 Agent Processing", open=False):
                     agent_thoughts = gr.Markdown()
+                with gr.Accordion("📚 Search Sources", open=False):
                     search_sources = gr.Markdown()
             with gr.Column(scale=1):
+                gr.Markdown("**⚙️ Settings**")
+                language_mode = gr.Radio(
+                    choices=["Auto", "Korean", "English", "Japanese", "Chinese"],
+                    value="Auto",
+                    label="🌍 Language Mode"
+                )
                 use_search = gr.Checkbox(
+                    label="🔍 Use Web Search",
                     value=True
                 )
                 show_agent_thoughts = gr.Checkbox(
+                    label="🧠 Show Processing",
                     value=True
                 )
                     maximum=10,
                     value=5,
                     step=1,
+                    label="Search Results Count"
                 )
                 gr.Markdown("""
+                **⚡ Optimization Status**
+                **Active Optimizations:**
+                - ✅ Parallel Processing
+                - ❌ ~~Smart Caching~~ (제거됨)
+                - ✅ Buffer Streaming
+                - ✅ Early Termination
+                - ✅ Compressed Prompts
+                - ✅ Multi-language Support
+                - ✅ Error Recovery
+                **Expected Processing Time:**
+                - Simple Query: 3-5 seconds
+                - Complex Query: 5-8 seconds
+                - Very Complex: 8-12 seconds
                 """)
+        # 복잡한 질문 예제 (다국어)
         gr.Examples(
             examples=[
+                # Korean
                 "AI 기술이 향후 10년간 한국 경제에 미칠 영향을 다각도로 분석하고 대응 전략을 제시해줘",
                 "스타트업이 대기업과 경쟁하기 위한 혁신적인 전략을 단계별로 수립해줘",
+                # English
+                "Analyze the multifaceted impact of quantum computing on current encryption systems and propose alternatives",
+                "Design 5 innovative business models for climate change mitigation with practical implementation details",
+                # Japanese
+                "メタバース時代の教育革新方案を実装可能なレベルで提案してください",
+                # Chinese
+                "分析人工智能对未来十年全球经济的影响并提出应对策略"
             ],
             inputs=msg
         )
         # 이벤트 바인딩
         submit.click(
             process_query_optimized,
+            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
             outputs=[chatbot, agent_thoughts, search_sources]
         ).then(
             lambda: "",
         msg.submit(
             process_query_optimized,
+            inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
             outputs=[chatbot, agent_thoughts, search_sources]
         ).then(
             lambda: "",
 if __name__ == "__main__":
     print("""
 ╔══════════════════════════════════════════════════════════════╗
+║     ⚡ Speed-Optimized Multi-Agent System (No Cache) ⚡     ║
 ║                                                              ║
+║   High-speed AI system processing complex questions         ║
 ║                                                              ║
+║  Features:                                                  ║
+║  • Multi-language support (KO/EN/JA/ZH)                    ║
+║  • Improved error recovery                                  ║
+║  • NO CACHING (캐싱 기능 제거됨)                           ║
+║  • Adaptive stream buffering                                ║
+║  • Response cleaning & formatting                           ║
 ╚══════════════════════════════════════════════════════════════╝
     """)
     # API 키 확인
     if not os.getenv("FIREWORKS_API_KEY"):
+        print("\n⚠️  FIREWORKS_API_KEY is not set.")
     if not os.getenv("BRAVE_SEARCH_API_KEY"):
+        print("\n⚠️  BRAVE_SEARCH_API_KEY is not set.")
     # Gradio 앱 실행
     demo = create_optimized_gradio_interface()
     is_hf_spaces = os.getenv("SPACE_ID") is not None
     if is_hf_spaces:
+        print("\n🤗 Running in optimized mode on Hugging Face Spaces (No Cache)...")
         demo.launch(server_name="0.0.0.0", server_port=7860)
     else:
+        print("\n💻 Running in optimized mode on local environment (No Cache)...")
         demo.launch(server_name="0.0.0.0", server_port=7860, share=False)