aiqtech commited on
Commit
5576ce9
ยท
verified ยท
1 Parent(s): eddae4d

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +689 -330
app-backup.py CHANGED
@@ -1,6 +1,8 @@
1
  """
2
  โšก Speed-Optimized Multi-Agent RAG System for Complex Questions
3
- ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ, ์Šค๋งˆํŠธ ์บ์‹ฑ, ๋™์  ํŒŒ์ดํ”„๋ผ์ธ์œผ๋กœ ๋ณต์žกํ•œ ์งˆ๋ฌธ๋„ ๋น ๋ฅด๊ฒŒ ์ฒ˜๋ฆฌ
 
 
4
  """
5
 
6
  import os
@@ -8,6 +10,8 @@ import json
8
  import time
9
  import asyncio
10
  import hashlib
 
 
11
  from typing import Optional, List, Dict, Any, Tuple, Generator, AsyncGenerator
12
  from datetime import datetime, timedelta
13
  from enum import Enum
@@ -59,105 +63,60 @@ class AgentResponse(BaseModel):
59
 
60
 
61
  # ============================================================================
62
- # ์Šค๋งˆํŠธ ์บ์‹ฑ ์‹œ์Šคํ…œ
63
  # ============================================================================
64
 
65
- class SmartCache:
66
- """์ง€๋Šฅํ˜• ์บ์‹ฑ ์‹œ์Šคํ…œ"""
67
 
68
- def __init__(self, max_size: int = 100, ttl_hours: int = 24):
69
- self.cache = {}
70
- self.access_count = {}
71
- self.timestamps = {}
72
- self.max_size = max_size
73
- self.ttl = timedelta(hours=ttl_hours)
74
- self.reasoning_patterns = self._init_reasoning_patterns()
75
-
76
- def _init_reasoning_patterns(self) -> Dict:
77
- """์ž์ฃผ ์‚ฌ์šฉ๋˜๋Š” ์ถ”๋ก  ํŒจํ„ด ์ดˆ๊ธฐํ™”"""
78
- return {
79
- "analysis": {
80
- "structure": ["ํ˜„ํ™ฉ ๋ถ„์„", "ํ•ต์‹ฌ ์š”์ธ", "์˜ํ–ฅ ํ‰๊ฐ€", "์ „๋žต ์ œ์•ˆ"],
81
- "keywords": ["๋ถ„์„", "ํ‰๊ฐ€", "์˜ํ–ฅ", "์ „๋žต"]
82
- },
83
- "comparison": {
84
- "structure": ["๋Œ€์ƒ ์ •์˜", "๋น„๊ต ๊ธฐ์ค€", "์žฅ๋‹จ์  ๋ถ„์„", "๊ฒฐ๋ก "],
85
- "keywords": ["๋น„๊ต", "์ฐจ์ด", "์žฅ๋‹จ์ ", "vs"]
86
- },
87
- "creative": {
88
- "structure": ["๋ฌธ์ œ ์ •์˜", "์ฐฝ์˜์  ์ ‘๊ทผ", "๊ตฌํ˜„ ๋ฐฉ๋ฒ•", "์˜ˆ์ƒ ํšจ๊ณผ"],
89
- "keywords": ["์ฐฝ์˜์ ", "ํ˜์‹ ์ ", "์ƒˆ๋กœ์šด", "์•„์ด๋””์–ด"]
90
- },
91
- "technical": {
92
- "structure": ["๊ธฐ์ˆ  ๊ฐœ์š”", "ํ•ต์‹ฌ ์›๋ฆฌ", "๊ตฌํ˜„ ์ƒ์„ธ", "์‹ค์šฉ ์˜ˆ์‹œ"],
93
- "keywords": ["๊ธฐ์ˆ ", "๊ตฌํ˜„", "์ฝ”๋“œ", "์‹œ์Šคํ…œ"]
94
- }
95
- }
96
-
97
- def get_query_hash(self, query: str) -> str:
98
- """์ฟผ๋ฆฌ ํ•ด์‹œ ์ƒ์„ฑ"""
99
- return hashlib.md5(query.encode()).hexdigest()
100
-
101
- def get(self, query: str) -> Optional[Dict]:
102
- """์บ์‹œ์—์„œ ์กฐํšŒ"""
103
- query_hash = self.get_query_hash(query)
104
-
105
- if query_hash in self.cache:
106
- # TTL ์ฒดํฌ
107
- if datetime.now() - self.timestamps[query_hash] < self.ttl:
108
- self.access_count[query_hash] += 1
109
- return self.cache[query_hash]
110
- else:
111
- # ๋งŒ๋ฃŒ๋œ ์บ์‹œ ์‚ญ์ œ
112
- del self.cache[query_hash]
113
- del self.timestamps[query_hash]
114
- del self.access_count[query_hash]
115
-
116
- return None
117
-
118
- def set(self, query: str, response: Dict):
119
- """์บ์‹œ์— ์ €์žฅ"""
120
- query_hash = self.get_query_hash(query)
121
-
122
- # ์บ์‹œ ํฌ๊ธฐ ๊ด€๋ฆฌ
123
- if len(self.cache) >= self.max_size:
124
- # LRU ์ •์ฑ…: ๊ฐ€์žฅ ์ ๊ฒŒ ์‚ฌ์šฉ๋œ ํ•ญ๋ชฉ ์ œ๊ฑฐ
125
- least_used = min(self.access_count, key=self.access_count.get)
126
- del self.cache[least_used]
127
- del self.timestamps[least_used]
128
- del self.access_count[least_used]
129
-
130
- self.cache[query_hash] = response
131
- self.timestamps[query_hash] = datetime.now()
132
- self.access_count[query_hash] = 1
133
-
134
- def get_reasoning_pattern(self, query: str) -> Optional[Dict]:
135
- """์ฟผ๋ฆฌ์— ์ ํ•ฉํ•œ ์ถ”๋ก  ํŒจํ„ด ๋ฐ˜ํ™˜"""
136
- query_lower = query.lower()
137
 
138
- for pattern_type, pattern_data in self.reasoning_patterns.items():
139
- if any(keyword in query_lower for keyword in pattern_data["keywords"]):
140
- return {
141
- "type": pattern_type,
142
- "structure": pattern_data["structure"]
143
- }
 
 
 
 
 
144
 
145
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
 
148
  # ============================================================================
149
- # ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ์ตœ์ ํ™” Brave Search
150
  # ============================================================================
151
 
152
  class AsyncBraveSearch:
153
- """๋น„๋™๊ธฐ Brave ๊ฒ€์ƒ‰ ํด๋ผ์ด์–ธํŠธ"""
154
 
155
  def __init__(self, api_key: Optional[str] = None):
156
  self.api_key = api_key or os.getenv("BRAVE_SEARCH_API_KEY")
157
  self.base_url = "https://api.search.brave.com/res/v1/web/search"
 
158
 
159
- async def search_async(self, query: str, count: int = 5) -> List[Dict]:
160
- """๋น„๋™๊ธฐ ๊ฒ€์ƒ‰"""
161
  if not self.api_key:
162
  return []
163
 
@@ -166,48 +125,70 @@ class AsyncBraveSearch:
166
  "X-Subscription-Token": self.api_key
167
  }
168
 
 
 
 
 
 
 
 
 
169
  params = {
170
  "q": query,
171
  "count": count,
172
  "text_decorations": False,
173
- "search_lang": "ko",
174
- "country": "KR"
175
  }
176
 
177
- try:
178
- async with aiohttp.ClientSession() as session:
179
- async with session.get(
180
- self.base_url,
181
- headers=headers,
182
- params=params,
183
- timeout=aiohttp.ClientTimeout(total=5)
184
- ) as response:
185
- if response.status == 200:
186
- data = await response.json()
187
-
188
- results = []
189
- if "web" in data and "results" in data["web"]:
190
- for item in data["web"]["results"][:count]:
191
- results.append({
192
- "title": item.get("title", ""),
193
- "url": item.get("url", ""),
194
- "description": item.get("description", ""),
195
- "age": item.get("age", "")
196
- })
197
-
198
- return results
199
- except:
200
- return []
 
 
 
 
 
 
 
 
201
 
202
  return []
 
 
 
 
 
 
 
203
 
204
 
205
  # ============================================================================
206
- # ์ตœ์ ํ™”๋œ Fireworks ํด๋ผ์ด์–ธํŠธ
207
  # ============================================================================
208
 
209
  class OptimizedFireworksClient:
210
- """์ตœ์ ํ™”๋œ LLM ํด๋ผ์ด์–ธํŠธ"""
211
 
212
  def __init__(self, api_key: Optional[str] = None):
213
  self.api_key = api_key or os.getenv("FIREWORKS_API_KEY")
@@ -223,13 +204,32 @@ class OptimizedFireworksClient:
223
 
224
  # ํ•ญ์ƒ ์ตœ๊ณ  ์„ฑ๋Šฅ ๋ชจ๋ธ ์‚ฌ์šฉ (๋ณต์žกํ•œ ์งˆ๋ฌธ ์ „์ œ)
225
  self.model = "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
  async def chat_stream_async(
228
  self,
229
  messages: List[Dict],
230
  **kwargs
231
  ) -> AsyncGenerator[str, None]:
232
- """๋น„๋™๊ธฐ ์ŠคํŠธ๋ฆฌ๋ฐ ๋Œ€ํ™”"""
233
 
234
  payload = {
235
  "model": self.model,
@@ -241,34 +241,43 @@ class OptimizedFireworksClient:
241
  "stream": True
242
  }
243
 
244
- try:
245
- async with aiohttp.ClientSession() as session:
246
- async with session.post(
247
- self.base_url,
248
- headers={**self.headers, "Accept": "text/event-stream"},
249
- json=payload,
250
- timeout=aiohttp.ClientTimeout(total=30)
251
- ) as response:
252
- async for line in response.content:
253
- line_str = line.decode('utf-8').strip()
254
- if line_str.startswith("data: "):
255
- data_str = line_str[6:]
256
- if data_str == "[DONE]":
257
- break
258
- try:
259
- data = json.loads(data_str)
260
- if "choices" in data and len(data["choices"]) > 0:
261
- delta = data["choices"][0].get("delta", {})
262
- if "content" in delta:
263
- yield delta["content"]
264
- except json.JSONDecodeError:
265
- continue
266
- except Exception as e:
267
- yield f"์˜ค๋ฅ˜: {str(e)}"
 
 
 
 
 
 
 
 
 
268
 
269
 
270
  # ============================================================================
271
- # ๊ฒฝ๋Ÿ‰ํ™”๋œ ์ถ”๋ก  ์ฒด์ธ
272
  # ============================================================================
273
 
274
  class LightweightReasoningChain:
@@ -276,28 +285,70 @@ class LightweightReasoningChain:
276
 
277
  def __init__(self):
278
  self.templates = {
279
- "problem_solving": {
280
- "steps": ["๋ฌธ์ œ ๋ถ„ํ•ด", "ํ•ต์‹ฌ ์š”์ธ", "ํ•ด๊ฒฐ ๋ฐฉ์•ˆ", "๊ตฌํ˜„ ์ „๋žต"],
281
- "prompt": "์ฒด๊ณ„์ ์œผ๋กœ ๋‹จ๊ณ„๋ณ„๋กœ ๋ถ„์„ํ•˜๊ณ  ํ•ด๊ฒฐ์ฑ…์„ ์ œ์‹œํ•˜์„ธ์š”."
282
- },
283
- "creative_thinking": {
284
- "steps": ["๊ธฐ์กด ์ ‘๊ทผ", "์ฐฝ์˜์  ๋Œ€์•ˆ", "ํ˜์‹  ํฌ์ธํŠธ", "์‹คํ–‰ ๋ฐฉ๋ฒ•"],
285
- "prompt": "๊ธฐ์กด ๋ฐฉ์‹์„ ๋„˜์–ด์„  ์ฐฝ์˜์ ์ด๊ณ  ํ˜์‹ ์ ์ธ ์ ‘๊ทผ์„ ์ œ์‹œํ•˜์„ธ์š”."
 
 
 
 
 
 
286
  },
287
- "critical_analysis": {
288
- "steps": ["ํ˜„ํ™ฉ ํ‰๊ฐ€", "๊ฐ•์ /์•ฝ์ ", "๊ธฐํšŒ/์œ„ํ˜‘", "๊ฐœ์„  ๋ฐฉํ–ฅ"],
289
- "prompt": "๋น„ํŒ์  ๊ด€์ ์—์„œ ์ฒ ์ €ํžˆ ๋ถ„์„ํ•˜๊ณ  ๊ฐœ์„ ์ ์„ ๋„์ถœํ•˜์„ธ์š”."
 
 
 
 
 
 
 
 
 
 
290
  }
291
  }
292
 
293
- def get_reasoning_structure(self, query_type: str) -> Dict:
294
  """์ฟผ๋ฆฌ ์œ ํ˜•์— ๋งž๋Š” ์ถ”๋ก  ๊ตฌ์กฐ ๋ฐ˜ํ™˜"""
295
- # ๊ธฐ๋ณธ๊ฐ’์€ problem_solving
296
- return self.templates.get(query_type, self.templates["problem_solving"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
 
299
  # ============================================================================
300
- # ์กฐ๊ธฐ ์ข…๋ฃŒ ๋ฉ”์ปค๋‹ˆ์ฆ˜
301
  # ============================================================================
302
 
303
  class QualityChecker:
@@ -312,16 +363,27 @@ class QualityChecker:
312
  "clarity": 0.2
313
  }
314
 
315
- def evaluate_response(self, response: str, query: str) -> Tuple[float, bool]:
316
- """์‘๋‹ต ํ’ˆ์งˆ ํ‰๊ฐ€"""
317
  scores = {}
318
 
 
 
 
 
319
  # ๊ธธ์ด ํ‰๊ฐ€
320
- scores["length"] = min(len(response) / 1000, 1.0) # 1000์ž ๊ธฐ์ค€
321
 
322
- # ๊ตฌ์กฐ ํ‰๊ฐ€
323
- structure_markers = ["1.", "2.", "โ€ข", "-", "์ฒซ์งธ", "๋‘˜์งธ", "๊ฒฐ๋ก ", "์š”์•ฝ"]
324
- scores["structure"] = sum(1 for m in structure_markers if m in response) / len(structure_markers)
 
 
 
 
 
 
 
325
 
326
  # ์™„์ „์„ฑ ํ‰๊ฐ€ (์ฟผ๋ฆฌ ํ‚ค์›Œ๋“œ ํฌํ•จ ์—ฌ๋ถ€)
327
  query_words = set(query.split())
@@ -329,9 +391,16 @@ class QualityChecker:
329
  scores["completeness"] = len(query_words & response_words) / max(len(query_words), 1)
330
 
331
  # ๋ช…ํ™•์„ฑ ํ‰๊ฐ€ (๋ฌธ์žฅ ๊ตฌ์กฐ)
332
- sentences = response.split('.')
 
 
 
 
 
 
 
333
  avg_sentence_length = sum(len(s.split()) for s in sentences) / max(len(sentences), 1)
334
- scores["clarity"] = min(avg_sentence_length / 20, 1.0) # 20๋‹จ์–ด ๊ธฐ์ค€
335
 
336
  # ๊ฐ€์ค‘ ํ‰๊ท  ๊ณ„์‚ฐ
337
  total_score = sum(
@@ -345,29 +414,41 @@ class QualityChecker:
345
 
346
 
347
  # ============================================================================
348
- # ์ŠคํŠธ๋ฆฌ๋ฐ ์ตœ์ ํ™”
349
  # ============================================================================
350
 
351
  class OptimizedStreaming:
352
- """์ŠคํŠธ๋ฆฌ๋ฐ ๋ฒ„ํผ ์ตœ์ ํ™”"""
353
 
354
- def __init__(self, chunk_size: int = 100, flush_interval: float = 0.1):
355
  self.chunk_size = chunk_size
356
  self.flush_interval = flush_interval
357
  self.buffer = ""
358
  self.last_flush = time.time()
 
359
 
360
  async def buffer_and_yield(
361
  self,
362
- stream: AsyncGenerator[str, None]
 
363
  ) -> AsyncGenerator[str, None]:
364
- """๋ฒ„ํผ๋ง๋œ ์ŠคํŠธ๋ฆฌ๋ฐ"""
365
 
 
366
  async for chunk in stream:
367
  self.buffer += chunk
368
  current_time = time.time()
 
369
 
370
- if (len(self.buffer) >= self.chunk_size or
 
 
 
 
 
 
 
 
371
  current_time - self.last_flush >= self.flush_interval):
372
 
373
  yield self.buffer
@@ -380,85 +461,212 @@ class OptimizedStreaming:
380
 
381
 
382
  # ============================================================================
383
- # ํ†ตํ•ฉ ์ตœ์ ํ™” ๋ฉ€ํ‹ฐ ์—์ด์ „ํŠธ ์‹œ์Šคํ…œ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  # ============================================================================
385
 
386
  class SpeedOptimizedMultiAgentSystem:
387
- """์†๋„ ์ตœ์ ํ™”๋œ ๋ฉ€ํ‹ฐ ์—์ด์ „ํŠธ ์‹œ์Šคํ…œ"""
388
 
389
  def __init__(self):
390
  self.llm = OptimizedFireworksClient()
391
  self.search = AsyncBraveSearch()
392
- self.cache = SmartCache()
393
  self.reasoning = LightweightReasoningChain()
394
  self.quality_checker = QualityChecker()
395
  self.streaming = OptimizedStreaming()
396
-
397
- # ์ปดํŒฉํŠธ ํ”„๋กฌํ”„ํŠธ
398
- self.compact_prompts = self._init_compact_prompts()
399
 
400
  # ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ํ’€
401
  self.executor = ThreadPoolExecutor(max_workers=4)
402
 
403
- def _init_compact_prompts(self) -> Dict:
404
- """์••์ถ•๋œ ๊ณ ํšจ์œจ ํ”„๋กฌํ”„ํŠธ"""
405
- return {
406
- AgentRole.SUPERVISOR: """[๊ฐ๋…์ž-๊ตฌ์กฐ์„ค๊ณ„]
 
407
  ์ฆ‰์‹œ๋ถ„์„: ํ•ต์‹ฌ์˜๋„+ํ•„์š”์ •๋ณด+๋‹ต๋ณ€๊ตฌ์กฐ
408
  ์ถœ๋ ฅ: 5๊ฐœ ํ•ต์‹ฌํฌ์ธํŠธ(๊ฐ 1๋ฌธ์žฅ)
409
  ์ถ”๋ก ์ฒด๊ณ„ ๋ช…์‹œ""",
410
-
411
- AgentRole.CREATIVE: """[์ฐฝ์˜์„ฑ์ƒ์„ฑ์ž]
412
  ์ž…๋ ฅ๊ตฌ์กฐ ๋”ฐ๋ผ ์ฐฝ์˜์  ํ™•์žฅ
413
  ์‹ค์šฉ์˜ˆ์‹œ+ํ˜์‹ ์ ‘๊ทผ+๊ตฌ์ฒด์กฐ์–ธ
414
  ๋ถˆํ•„์š”์„ค๋ช… ์ œ๊ฑฐ""",
415
-
416
- AgentRole.CRITIC: """[๋น„ํ‰์ž-๊ฒ€์ฆ]
417
  ์‹ ์†๊ฒ€ํ† : ์ •ํ™•์„ฑ/๋…ผ๋ฆฌ์„ฑ/์‹ค์šฉ์„ฑ
418
  ๊ฐœ์„ ํฌ์ธํŠธ 3๊ฐœ๋งŒ
419
  ๊ฐ 2๋ฌธ์žฅ ์ด๋‚ด""",
420
-
421
- AgentRole.FINALIZER: """[์ตœ์ข…ํ†ตํ•ฉ]
422
  ๋ชจ๋“ ์˜๊ฒฌ ์ข…ํ•ฉโ†’์ตœ์ ๋‹ต๋ณ€
423
  ๋ช…ํ™•๊ตฌ์กฐ+์‹ค์šฉ์ •๋ณด+์ฐฝ์˜๊ท ํ˜•
424
- ํ•ต์‹ฌ๋จผ์ €+์ƒ์„ธ๋Š”ํ›„์ˆœ์œ„"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  }
 
 
426
 
427
  async def parallel_process_agents(
428
  self,
429
  query: str,
430
  search_results: List[Dict],
431
- show_progress: bool = True
 
432
  ) -> AsyncGenerator[Tuple[str, str], None]:
433
- """๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ํŒŒ์ดํ”„๋ผ์ธ"""
434
 
435
  start_time = time.time()
 
 
 
 
 
 
 
 
436
  search_context = self._format_search_results(search_results)
437
  accumulated_response = ""
438
  agent_thoughts = ""
439
 
440
- # ์บ์‹œ ํ™•์ธ
441
- cached = self.cache.get(query)
442
- if cached:
443
- yield cached["response"], "โœจ ์บ์‹œ์—์„œ ์ฆ‰์‹œ ๋กœ๋“œ"
444
- return
445
-
446
  # ์ถ”๋ก  ํŒจํ„ด ๊ฒฐ์ •
447
- reasoning_pattern = self.cache.get_reasoning_pattern(query)
448
 
449
  try:
450
  # === 1๋‹จ๊ณ„: ๊ฐ๋…์ž + ๊ฒ€์ƒ‰ ๋ณ‘๋ ฌ ์‹คํ–‰ ===
451
  if show_progress:
452
- agent_thoughts = "### ๐Ÿš€ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ์‹œ์ž‘\n"
453
- agent_thoughts += "๐Ÿ‘” ๊ฐ๋…์ž ๋ถ„์„ + ๐Ÿ” ์ถ”๊ฐ€ ๊ฒ€์ƒ‰ ๋™์‹œ ์ง„ํ–‰...\n\n"
 
 
 
 
 
454
  yield accumulated_response, agent_thoughts
455
 
456
- # ๊ฐ๋…์ž ํ”„๋กฌํ”„ํŠธ
457
- supervisor_prompt = f"""
 
458
  ์งˆ๋ฌธ: {query}
459
  ๊ฒ€์ƒ‰๊ฒฐ๊ณผ: {search_context}
460
  ์ถ”๋ก ํŒจํ„ด: {reasoning_pattern}
461
- ์ฆ‰์‹œ ํ•ต์‹ฌ๊ตฌ์กฐ 5๊ฐœ ์ œ์‹œ"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
  supervisor_response = ""
464
  supervisor_task = self.llm.chat_stream_async(
@@ -474,23 +682,54 @@ class SpeedOptimizedMultiAgentSystem:
474
  async for chunk in self.streaming.buffer_and_yield(supervisor_task):
475
  supervisor_response += chunk
476
  if show_progress and len(supervisor_response) < 300:
477
- agent_thoughts = f"### ๐Ÿ‘” ๊ฐ๋…์ž ๋ถ„์„\n{supervisor_response[:300]}...\n\n"
 
 
 
 
 
 
478
  yield accumulated_response, agent_thoughts
479
 
480
  # === 2๋‹จ๊ณ„: ์ฐฝ์˜์„ฑ + ๋น„ํ‰ ์ค€๋น„ ๋ณ‘๋ ฌ ===
481
  if show_progress:
482
- agent_thoughts += "### ๐ŸŽจ ์ฐฝ์˜์„ฑ ์ƒ์„ฑ์ž + ๐Ÿ” ๋น„ํ‰์ž ์ค€๋น„...\n\n"
 
 
 
 
 
 
483
  yield accumulated_response, agent_thoughts
484
 
485
- # ์ฐฝ์˜์„ฑ ์ƒ์„ฑ ์‹œ์ž‘
486
- creative_prompt = f"""
 
487
  ์งˆ๋ฌธ: {query}
488
  ๊ฐ๋…์ž๊ตฌ์กฐ: {supervisor_response}
489
  ๊ฒ€์ƒ‰๊ฒฐ๊ณผ: {search_context}
490
- ์ฐฝ์˜์ +์‹ค์šฉ์  ๋‹ต๋ณ€ ์ฆ‰์‹œ์ƒ์„ฑ"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
 
492
  creative_response = ""
493
- creative_partial = "" # ๋น„ํ‰์ž์šฉ ๋ถ€๋ถ„ ์‘๋‹ต
494
  critic_started = False
495
  critic_response = ""
496
 
@@ -512,11 +751,27 @@ class SpeedOptimizedMultiAgentSystem:
512
  if len(creative_partial) > 500 and not critic_started:
513
  critic_started = True
514
 
515
- # ๋น„ํ‰์ž ๋น„๋™๊ธฐ ์‹œ์ž‘
516
- critic_prompt = f"""
 
517
  ์›๋ณธ์งˆ๋ฌธ: {query}
518
  ์ฐฝ์˜์„ฑ๋‹ต๋ณ€(์ผ๋ถ€): {creative_partial}
519
- ์‹ ์†๊ฒ€ํ† โ†’๊ฐœ์„ ์ 3๊ฐœ"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
  critic_task = asyncio.create_task(
522
  self._run_critic_async(critic_prompt)
@@ -524,7 +779,13 @@ class SpeedOptimizedMultiAgentSystem:
524
 
525
  if show_progress:
526
  display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
527
- agent_thoughts = f"### ๐ŸŽจ ์ฐฝ์˜์„ฑ ์ƒ์„ฑ์ž\n{display_creative}\n\n"
 
 
 
 
 
 
528
  yield accumulated_response, agent_thoughts
529
 
530
  # ๋น„ํ‰์ž ๊ฒฐ๊ณผ ๋Œ€๊ธฐ
@@ -532,41 +793,76 @@ class SpeedOptimizedMultiAgentSystem:
532
  critic_response = await critic_task
533
 
534
  if show_progress:
535
- agent_thoughts += f"### ๐Ÿ” ๋น„ํ‰์ž ๊ฒ€ํ† \n{critic_response[:200]}...\n\n"
 
 
 
 
 
 
536
  yield accumulated_response, agent_thoughts
537
 
538
  # === 3๋‹จ๊ณ„: ํ’ˆ์งˆ ์ฒดํฌ ๋ฐ ์กฐ๊ธฐ ์ข…๋ฃŒ ===
539
  quality_score, need_more = self.quality_checker.evaluate_response(
540
- creative_response, query
541
  )
542
 
543
  if not need_more and quality_score > 0.85:
544
  # ํ’ˆ์งˆ์ด ์ถฉ๋ถ„ํžˆ ๋†’์œผ๋ฉด ๋ฐ”๋กœ ๋ฐ˜ํ™˜
545
- accumulated_response = creative_response
546
 
547
  if show_progress:
548
- agent_thoughts += f"### โœ… ํ’ˆ์งˆ ์ถฉ์กฑ (์ ์ˆ˜: {quality_score:.2f})\n์กฐ๊ธฐ ์™„๋ฃŒ!\n"
549
-
550
- # ์บ์‹œ ์ €์žฅ
551
- self.cache.set(query, {
552
- "response": accumulated_response,
553
- "timestamp": datetime.now()
554
- })
555
 
556
  yield accumulated_response, agent_thoughts
557
  return
558
 
559
  # === 4๋‹จ๊ณ„: ์ตœ์ข… ํ†ตํ•ฉ (์ŠคํŠธ๋ฆฌ๋ฐ) ===
560
  if show_progress:
561
- agent_thoughts += "### โœ… ์ตœ์ข… ํ†ตํ•ฉ ์ค‘...\n\n"
 
 
 
 
 
 
562
  yield accumulated_response, agent_thoughts
563
 
564
- final_prompt = f"""
 
 
565
  ์งˆ๋ฌธ: {query}
566
  ์ฐฝ์˜์„ฑ๋‹ต๋ณ€: {creative_response}
567
  ๋น„ํ‰ํ”ผ๋“œ๋ฐฑ: {critic_response}
568
  ๊ฐ๋…์ž๊ตฌ์กฐ: {supervisor_response}
569
- ์ตœ์ข…ํ†ตํ•ฉโ†’์™„๋ฒฝ๋‹ต๋ณ€"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
 
571
  final_task = self.llm.chat_stream_async(
572
  messages=[
@@ -579,28 +875,39 @@ class SpeedOptimizedMultiAgentSystem:
579
 
580
  # ์ตœ์ข… ๋‹ต๋ณ€ ์ŠคํŠธ๋ฆฌ๋ฐ
581
  accumulated_response = ""
582
- async for chunk in self.streaming.buffer_and_yield(final_task):
 
583
  accumulated_response += chunk
584
- yield accumulated_response, agent_thoughts
 
 
585
 
586
- # ์ฒ˜๋ฆฌ ์‹œ๊ฐ„ ์ถ”๊ฐ€
587
- processing_time = time.time() - start_time
588
- accumulated_response += f"\n\n---\nโšก ์ฒ˜๋ฆฌ ์‹œ๊ฐ„: {processing_time:.1f}์ดˆ"
589
 
590
- # ์บ์‹œ ์ €์žฅ
591
- self.cache.set(query, {
592
- "response": accumulated_response,
593
- "timestamp": datetime.now()
594
- })
 
 
 
 
595
 
596
  yield accumulated_response, agent_thoughts
597
 
598
  except Exception as e:
599
- error_msg = f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
600
- yield error_msg, agent_thoughts
 
 
 
 
 
601
 
602
  async def _run_critic_async(self, prompt: str) -> str:
603
- """๋น„ํ‰์ž ๋น„๋™๊ธฐ ์‹คํ–‰"""
604
  try:
605
  response = ""
606
  async for chunk in self.llm.chat_stream_async(
@@ -613,27 +920,37 @@ class SpeedOptimizedMultiAgentSystem:
613
  ):
614
  response += chunk
615
  return response
616
- except:
617
- return "๋น„ํ‰ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜"
 
 
 
 
 
 
 
 
618
 
619
  def _format_search_results(self, results: List[Dict]) -> str:
620
  """๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์••์ถ• ํฌ๋งท"""
621
  if not results:
622
- return "๊ฒ€์ƒ‰๊ฒฐ๊ณผ์—†์Œ"
623
 
624
  formatted = []
625
- for i, r in enumerate(results[:3], 1): # ์ƒ์œ„ 3๊ฐœ๋งŒ
626
- formatted.append(f"[{i}]{r.get('title','')[:50]}:{r.get('description','')[:100]}")
 
 
627
 
628
  return " | ".join(formatted)
629
 
630
 
631
  # ============================================================================
632
- # Gradio UI (์ตœ์ ํ™” ๋ฒ„์ „)
633
  # ============================================================================
634
 
635
  def create_optimized_gradio_interface():
636
- """์ตœ์ ํ™”๋œ Gradio ์ธํ„ฐํŽ˜์ด์Šค"""
637
 
638
  # ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”
639
  system = SpeedOptimizedMultiAgentSystem()
@@ -643,57 +960,67 @@ def create_optimized_gradio_interface():
643
  history: List[Dict],
644
  use_search: bool,
645
  show_agent_thoughts: bool,
646
- search_count: int
 
647
  ):
648
- """์ตœ์ ํ™”๋œ ์ฟผ๋ฆฌ ์ฒ˜๋ฆฌ - ๋™๊ธฐ ๋ฒ„์ „"""
649
 
650
  if not message:
651
  yield history, "", ""
652
  return
653
 
 
 
 
 
 
 
 
654
  # ๋น„๋™๊ธฐ ํ•จ์ˆ˜๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰
655
  try:
656
  import nest_asyncio
657
  nest_asyncio.apply()
658
  except ImportError:
659
- pass # nest_asyncio๊ฐ€ ์—†์–ด๋„ ์ง„ํ–‰
660
-
661
- def run_async_function(coro):
662
- """๋น„๋™๊ธฐ ํ•จ์ˆ˜๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰ํ•˜๋Š” ํ—ฌํผ"""
663
- try:
664
- loop = asyncio.get_event_loop()
665
- if loop.is_running():
666
- # ์ด๋ฏธ ์‹คํ–‰ ์ค‘์ธ ๋ฃจํ”„๊ฐ€ ์žˆ์œผ๋ฉด ์ƒˆ ์Šค๋ ˆ๋“œ์—์„œ ์‹คํ–‰
667
- import concurrent.futures
668
- with concurrent.futures.ThreadPoolExecutor() as executor:
669
- future = executor.submit(asyncio.run, coro)
670
- return future.result()
671
- else:
672
- return loop.run_until_complete(coro)
673
- except RuntimeError:
674
- # ๋ฃจํ”„๊ฐ€ ์—†์œผ๋ฉด ์ƒˆ๋กœ ์ƒ์„ฑ
675
- return asyncio.run(coro)
676
 
677
  try:
678
  # ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ (๋™๊ธฐํ™”)
679
  search_results = []
680
  search_display = ""
681
 
 
 
 
682
  if use_search:
683
  # ๊ฒ€์ƒ‰ ์ƒํƒœ ํ‘œ์‹œ
 
 
 
 
 
 
684
  history_with_message = history + [
685
  {"role": "user", "content": message},
686
- {"role": "assistant", "content": "โšก ๊ณ ์† ์ฒ˜๋ฆฌ ์ค‘..."}
687
  ]
688
  yield history_with_message, "", ""
689
 
690
  # ๋น„๋™๊ธฐ ๊ฒ€์ƒ‰์„ ๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰
691
- search_results = run_async_function(
692
- system.search.search_async(message, count=search_count)
693
- )
 
 
 
694
 
695
  if search_results:
696
- search_display = "## ๐Ÿ“š ์ฐธ๊ณ  ์ž๋ฃŒ\n\n"
 
 
 
 
 
 
697
  for i, result in enumerate(search_results[:3], 1):
698
  search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
699
  search_display += f" {result['description'][:100]}...\n\n"
@@ -701,37 +1028,55 @@ def create_optimized_gradio_interface():
701
  # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
702
  current_history = history + [{"role": "user", "content": message}]
703
 
704
- # ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ์‹คํ–‰์„ ๋™๊ธฐ์ ์œผ๋กœ ์ˆ˜์ง‘
705
- async def collect_responses():
706
- responses = []
707
  async for response, thoughts in system.parallel_process_agents(
708
  query=message,
709
  search_results=search_results,
710
- show_progress=show_agent_thoughts
 
711
  ):
712
- responses.append((response, thoughts))
713
- return responses
714
 
715
- # ๋ชจ๋“  ์‘๋‹ต ์ˆ˜์ง‘
716
- all_responses = run_async_function(collect_responses())
 
717
 
718
- # ์ˆ˜์ง‘๋œ ์‘๋‹ต์„ yield
719
- for response, thoughts in all_responses:
720
- updated_history = current_history + [
721
- {"role": "assistant", "content": response}
722
- ]
723
- yield updated_history, thoughts, search_display
 
 
 
 
 
 
 
 
 
 
 
724
 
725
  except Exception as e:
726
  error_history = history + [
727
  {"role": "user", "content": message},
728
- {"role": "assistant", "content": f"โŒ ์˜ค๋ฅ˜: {str(e)}"}
729
  ]
730
  yield error_history, "", ""
 
 
 
 
 
 
731
 
732
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค
733
  with gr.Blocks(
734
- title="โšก Speed-Optimized Multi-Agent System",
735
  theme=gr.themes.Soft(),
736
  css="""
737
  .gradio-container {
@@ -741,50 +1086,57 @@ def create_optimized_gradio_interface():
741
  """
742
  ) as demo:
743
  gr.Markdown("""
744
- # โšก ๊ณ ์† Multi-Agent RAG System
745
- ### ๋ณต์žกํ•œ ์งˆ๋ฌธ๋„ 5์ดˆ ์ด๋‚ด ์ฒ˜๋ฆฌ ๋ชฉํ‘œ
746
-
747
- **์ตœ์ ํ™” ๊ธฐ์ˆ :**
748
- - ๐Ÿš€ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ: ์—์ด์ „ํŠธ ๋™์‹œ ์‹คํ–‰
749
- - ๐Ÿ’พ ์Šค๋งˆํŠธ ์บ์‹ฑ: ์ž์ฃผ ๋ฌป๋Š” ํŒจํ„ด ์ฆ‰์‹œ ์‘๋‹ต
750
- - โšก ์ŠคํŠธ๋ฆฌ๋ฐ ๋ฒ„ํผ: ๋„คํŠธ์›Œํฌ ์ตœ์ ํ™”
751
- - ๐ŸŽฏ ์กฐ๊ธฐ ์ข…๋ฃŒ: ํ’ˆ์งˆ ์ถฉ์กฑ ์‹œ ์ฆ‰์‹œ ์™„๋ฃŒ
 
752
  """)
753
 
754
  with gr.Row():
755
  with gr.Column(scale=3):
756
  chatbot = gr.Chatbot(
757
  height=500,
758
- label="๐Ÿ’ฌ ๋Œ€ํ™”",
759
  type="messages"
760
  )
761
 
762
  msg = gr.Textbox(
763
- label="๋ณต์žกํ•œ ์งˆ๋ฌธ ์ž…๋ ฅ",
764
- placeholder="๋ถ„์„, ์ „๋žต, ์ฐฝ์˜์  ํ•ด๊ฒฐ์ด ํ•„์š”ํ•œ ๋ณต์žกํ•œ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...",
765
  lines=3
766
  )
767
 
768
  with gr.Row():
769
- submit = gr.Button("โšก ๊ณ ์† ์ฒ˜๋ฆฌ", variant="primary")
770
- clear = gr.Button("๐Ÿ”„ ์ดˆ๊ธฐํ™”")
771
 
772
- with gr.Accordion("๐Ÿค– ์—์ด์ „ํŠธ ์ฒ˜๋ฆฌ ๊ณผ์ •", open=False):
773
  agent_thoughts = gr.Markdown()
774
 
775
- with gr.Accordion("๐Ÿ“š ๊ฒ€์ƒ‰ ์†Œ์Šค", open=False):
776
  search_sources = gr.Markdown()
777
 
778
  with gr.Column(scale=1):
779
- gr.Markdown("### โš™๏ธ ์„ค์ •")
 
 
 
 
 
 
780
 
781
  use_search = gr.Checkbox(
782
- label="๐Ÿ” ์›น ๊ฒ€์ƒ‰ ์‚ฌ์šฉ",
783
  value=True
784
  )
785
 
786
  show_agent_thoughts = gr.Checkbox(
787
- label="๐Ÿง  ์ฒ˜๋ฆฌ ๊ณผ์ • ํ‘œ์‹œ",
788
  value=True
789
  )
790
 
@@ -793,33 +1145,40 @@ def create_optimized_gradio_interface():
793
  maximum=10,
794
  value=5,
795
  step=1,
796
- label="๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ˆ˜"
797
  )
798
 
799
  gr.Markdown("""
800
- ### โšก ์ตœ์ ํ™” ์ƒํƒœ
801
-
802
- **ํ™œ์„ฑํ™”๋œ ์ตœ์ ํ™”:**
803
- - โœ… ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ
804
- - โœ… ์Šค๋งˆํŠธ ์บ์‹ฑ
805
- - โœ… ๋ฒ„ํผ ์ŠคํŠธ๋ฆฌ๋ฐ
806
- - โœ… ์กฐ๊ธฐ ์ข…๋ฃŒ
807
- - โœ… ์••์ถ• ํ”„๋กฌํ”„ํŠธ
808
-
809
- **์˜ˆ์ƒ ์ฒ˜๋ฆฌ ์‹œ๊ฐ„:**
810
- - ์บ์‹œ ํžˆํŠธ: < 1์ดˆ
811
- - ์ผ๋ฐ˜ ์งˆ๋ฌธ: 3-5์ดˆ
812
- - ๋ณต์žกํ•œ ์งˆ๋ฌธ: 5-8์ดˆ
 
 
813
  """)
814
 
815
- # ๋ณต์žกํ•œ ์งˆ๋ฌธ ์˜ˆ์ œ
816
  gr.Examples(
817
  examples=[
 
818
  "AI ๊ธฐ์ˆ ์ด ํ–ฅํ›„ 10๋…„๊ฐ„ ํ•œ๊ตญ ๊ฒฝ์ œ์— ๋ฏธ์น  ์˜ํ–ฅ์„ ๋‹ค๊ฐ๋„๋กœ ๋ถ„์„ํ•˜๊ณ  ๋Œ€์‘ ์ „๋žต์„ ์ œ์‹œํ•ด์ค˜",
819
  "์Šคํƒ€ํŠธ์—…์ด ๋Œ€๊ธฐ์—…๊ณผ ๊ฒฝ์Ÿํ•˜๊ธฐ ์œ„ํ•œ ํ˜์‹ ์ ์ธ ์ „๋žต์„ ๋‹จ๊ณ„๋ณ„๋กœ ์ˆ˜๋ฆฝํ•ด์ค˜",
820
- "๊ธฐํ›„๋ณ€ํ™” ๋Œ€์‘์„ ์œ„ํ•œ ์ฐฝ์˜์ ์ธ ๋น„์ฆˆ๋‹ˆ์Šค ๋ชจ๋ธ 5๊ฐ€์ง€๋ฅผ ๊ตฌ์ฒด์ ์œผ๋กœ ์„ค๊ณ„ํ•ด์ค˜",
821
- "์–‘์ž์ปดํ“จํ„ฐ๊ฐ€ ํ˜„์žฌ ์•”ํ˜ธํ™” ์ฒด๊ณ„์— ๋ฏธ์น  ์˜ํ–ฅ๊ณผ ๋Œ€์•ˆ์„ ๊ธฐ์ˆ ์ ์œผ๋กœ ๋ถ„์„ํ•ด์ค˜",
822
- "๋ฉ”ํƒ€๋ฒ„์Šค ์‹œ๋Œ€์˜ ๊ต์œก ํ˜์‹  ๋ฐฉ์•ˆ์„ ์‹ค์ œ ๊ตฌํ˜„ ๊ฐ€๋Šฅํ•œ ์ˆ˜์ค€์œผ๋กœ ์ œ์•ˆํ•ด์ค˜"
 
 
 
 
823
  ],
824
  inputs=msg
825
  )
@@ -827,7 +1186,7 @@ def create_optimized_gradio_interface():
827
  # ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
828
  submit.click(
829
  process_query_optimized,
830
- inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count],
831
  outputs=[chatbot, agent_thoughts, search_sources]
832
  ).then(
833
  lambda: "",
@@ -837,7 +1196,7 @@ def create_optimized_gradio_interface():
837
 
838
  msg.submit(
839
  process_query_optimized,
840
- inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count],
841
  outputs=[chatbot, agent_thoughts, search_sources]
842
  ).then(
843
  lambda: "",
@@ -861,25 +1220,25 @@ def create_optimized_gradio_interface():
861
  if __name__ == "__main__":
862
  print("""
863
  โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—
864
- โ•‘ โšก Speed-Optimized Multi-Agent RAG System โšก โ•‘
865
  โ•‘ โ•‘
866
- โ•‘ ๋ณต์žกํ•œ ์งˆ๋ฌธ๋„ 5์ดˆ ์ด๋‚ด ์ฒ˜๋ฆฌํ•˜๋Š” ๊ณ ์† AI ์‹œ์Šคํ…œ โ•‘
867
  โ•‘ โ•‘
868
- โ•‘ ์ตœ์ ํ™” ๊ธฐ์ˆ : โ•‘
869
- โ•‘ โ€ข ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ํŒŒ์ดํ”„๋ผ์ธ โ•‘
870
- โ•‘ โ€ข ์Šค๋งˆํŠธ ์บ์‹ฑ ์‹œ์Šคํ…œ โ•‘
871
- โ•‘ โ€ข ์ŠคํŠธ๋ฆฌ๋ฐ ๋ฒ„ํผ ์ตœ์ ํ™” โ•‘
872
- โ•‘ โ€ข ํ’ˆ์งˆ ๊ธฐ๋ฐ˜ ์กฐ๊ธฐ ์ข…๋ฃŒ โ•‘
873
- โ•‘ โ€ข ์••์ถ• ํ”„๋กฌํ”„ํŠธ ์—”์ง€๋‹ˆ์–ด๋ง โ•‘
874
  โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
875
  """)
876
 
877
  # API ํ‚ค ํ™•์ธ
878
  if not os.getenv("FIREWORKS_API_KEY"):
879
- print("\nโš ๏ธ FIREWORKS_API_KEY๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
880
 
881
  if not os.getenv("BRAVE_SEARCH_API_KEY"):
882
- print("\nโš ๏ธ BRAVE_SEARCH_API_KEY๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
883
 
884
  # Gradio ์•ฑ ์‹คํ–‰
885
  demo = create_optimized_gradio_interface()
@@ -887,8 +1246,8 @@ if __name__ == "__main__":
887
  is_hf_spaces = os.getenv("SPACE_ID") is not None
888
 
889
  if is_hf_spaces:
890
- print("\n๐Ÿค— Hugging Face Spaces์—์„œ ์ตœ์ ํ™” ๋ชจ๋“œ๋กœ ์‹คํ–‰ ์ค‘...")
891
  demo.launch(server_name="0.0.0.0", server_port=7860)
892
  else:
893
- print("\n๐Ÿ’ป ๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ ์ตœ์ ํ™” ๋ชจ๋“œ๋กœ ์‹คํ–‰ ์ค‘...")
894
  demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
1
  """
2
  โšก Speed-Optimized Multi-Agent RAG System for Complex Questions
3
+ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ, ๋™์  ํŒŒ์ดํ”„๋ผ์ธ์œผ๋กœ ๋ณต์žกํ•œ ์งˆ๋ฌธ๋„ ๋น ๋ฅด๊ฒŒ ์ฒ˜๋ฆฌ
4
+ Enhanced with multi-language support and improved error handling
5
+ (์บ์‹ฑ ๊ธฐ๋Šฅ ์ œ๊ฑฐ ๋ฒ„์ „)
6
  """
7
 
8
  import os
 
10
  import time
11
  import asyncio
12
  import hashlib
13
+ import re
14
+ import sys
15
  from typing import Optional, List, Dict, Any, Tuple, Generator, AsyncGenerator
16
  from datetime import datetime, timedelta
17
  from enum import Enum
 
63
 
64
 
65
  # ============================================================================
66
+ # ์–ธ์–ด ๊ฐ์ง€ ์œ ํ‹ธ๋ฆฌํ‹ฐ
67
  # ============================================================================
68
 
69
+ class LanguageDetector:
70
+ """์–ธ์–ด ๊ฐ์ง€ ๋ฐ ์ฒ˜๋ฆฌ ์œ ํ‹ธ๋ฆฌํ‹ฐ"""
71
 
72
+ @staticmethod
73
+ def detect_language(text: str) -> str:
74
+ """๊ฐ„๋‹จํ•œ ์–ธ์–ด ๊ฐ์ง€"""
75
+ import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # ํ•œ๊ธ€ ํŒจํ„ด
78
+ korean_pattern = re.compile('[๊ฐ€-ํžฃ]+')
79
+ # ์ผ๋ณธ์–ด ํŒจํ„ด (ํžˆ๋ผ๊ฐ€๋‚˜, ๊ฐ€ํƒ€์นด๋‚˜)
80
+ japanese_pattern = re.compile('[ใ-ใ‚“]+|[ใ‚ก-ใƒดใƒผ]+')
81
+ # ์ค‘๊ตญ์–ด ํŒจํ„ด
82
+ chinese_pattern = re.compile('[\u4e00-\u9fff]+')
83
+
84
+ # ํ…์ŠคํŠธ ๊ธธ์ด ๋Œ€๋น„ ๊ฐ ์–ธ์–ด ๋ฌธ์ž ๋น„์œจ ๊ณ„์‚ฐ
85
+ text_length = len(text)
86
+ if text_length == 0:
87
+ return 'en'
88
 
89
+ korean_chars = len(korean_pattern.findall(text))
90
+ japanese_chars = len(japanese_pattern.findall(text))
91
+ chinese_chars = len(chinese_pattern.findall(text))
92
+
93
+ # ํ•œ๊ธ€ ๋น„์œจ์ด 10% ์ด์ƒ์ด๋ฉด ํ•œ๊ตญ์–ด
94
+ if korean_chars > 0 and (korean_chars / text_length > 0.1):
95
+ return 'ko'
96
+ # ์ผ๋ณธ์–ด ๋ฌธ์ž๊ฐ€ ์žˆ์œผ๋ฉด ์ผ๋ณธ์–ด
97
+ elif japanese_chars > 0:
98
+ return 'ja'
99
+ # ์ค‘๊ตญ์–ด ๋ฌธ์ž๊ฐ€ ์žˆ์œผ๋ฉด ์ค‘๊ตญ์–ด
100
+ elif chinese_chars > 0:
101
+ return 'zh'
102
+ else:
103
+ return 'en'
104
 
105
 
106
  # ============================================================================
107
+ # ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ์ตœ์ ํ™” Brave Search (๊ฐœ์„ ๋จ)
108
  # ============================================================================
109
 
110
  class AsyncBraveSearch:
111
+ """๋น„๋™๊ธฐ Brave ๊ฒ€์ƒ‰ ํด๋ผ์ด์–ธํŠธ with retry logic"""
112
 
113
  def __init__(self, api_key: Optional[str] = None):
114
  self.api_key = api_key or os.getenv("BRAVE_SEARCH_API_KEY")
115
  self.base_url = "https://api.search.brave.com/res/v1/web/search"
116
+ self.max_retries = 3
117
 
118
+ async def search_async(self, query: str, count: int = 5, lang: str = 'ko') -> List[Dict]:
119
+ """๋น„๋™๊ธฐ ๊ฒ€์ƒ‰ with retry"""
120
  if not self.api_key:
121
  return []
122
 
 
125
  "X-Subscription-Token": self.api_key
126
  }
127
 
128
+ # ์–ธ์–ด๋ณ„ ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค์ •
129
+ lang_params = {
130
+ 'ko': {"search_lang": "ko", "country": "KR"},
131
+ 'en': {"search_lang": "en", "country": "US"},
132
+ 'ja': {"search_lang": "ja", "country": "JP"},
133
+ 'zh': {"search_lang": "zh", "country": "CN"}
134
+ }
135
+
136
  params = {
137
  "q": query,
138
  "count": count,
139
  "text_decorations": False,
140
+ **lang_params.get(lang, lang_params['en'])
 
141
  }
142
 
143
+ for attempt in range(self.max_retries):
144
+ try:
145
+ async with aiohttp.ClientSession() as session:
146
+ async with session.get(
147
+ self.base_url,
148
+ headers=headers,
149
+ params=params,
150
+ timeout=aiohttp.ClientTimeout(total=5)
151
+ ) as response:
152
+ if response.status == 200:
153
+ data = await response.json()
154
+
155
+ results = []
156
+ if "web" in data and "results" in data["web"]:
157
+ for item in data["web"]["results"][:count]:
158
+ results.append({
159
+ "title": item.get("title", ""),
160
+ "url": item.get("url", ""),
161
+ "description": item.get("description", ""),
162
+ "age": item.get("age", "")
163
+ })
164
+
165
+ return results
166
+ elif response.status == 429: # Rate limit
167
+ await asyncio.sleep(2 ** attempt)
168
+ continue
169
+ except aiohttp.ClientError as e:
170
+ if attempt < self.max_retries - 1:
171
+ await asyncio.sleep(2 ** attempt) # Exponential backoff
172
+ continue
173
+ except Exception:
174
+ pass
175
 
176
  return []
177
+
178
+ async def batch_search(self, queries: List[str], lang: str = 'ko') -> List[List[Dict]]:
179
+ """์—ฌ๋Ÿฌ ๊ฒ€์ƒ‰์„ ๋ฐฐ์น˜๋กœ ์ฒ˜๋ฆฌ"""
180
+ tasks = [self.search_async(q, lang=lang) for q in queries]
181
+ results = await asyncio.gather(*tasks, return_exceptions=True)
182
+ # ์˜ˆ์™ธ ์ฒ˜๋ฆฌ
183
+ return [r if not isinstance(r, Exception) else [] for r in results]
184
 
185
 
186
  # ============================================================================
187
+ # ์ตœ์ ํ™”๋œ Fireworks ํด๋ผ์ด์–ธํŠธ (๊ฐœ์„ ๋จ)
188
  # ============================================================================
189
 
190
  class OptimizedFireworksClient:
191
+ """์ตœ์ ํ™”๋œ LLM ํด๋ผ์ด์–ธํŠธ with language support"""
192
 
193
  def __init__(self, api_key: Optional[str] = None):
194
  self.api_key = api_key or os.getenv("FIREWORKS_API_KEY")
 
204
 
205
  # ํ•ญ์ƒ ์ตœ๊ณ  ์„ฑ๋Šฅ ๋ชจ๋ธ ์‚ฌ์šฉ (๋ณต์žกํ•œ ์งˆ๋ฌธ ์ „์ œ)
206
  self.model = "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
207
+ self.max_retries = 3
208
+
209
+ def compress_prompt(self, text: str, max_length: int = 2000) -> str:
210
+ """ํ”„๋กฌํ”„ํŠธ ์••์ถ•"""
211
+ if len(text) <= max_length:
212
+ return text
213
+
214
+ # ์ค‘์š”ํ•œ ๋ถ€๋ถ„ ์šฐ์„ ์ˆœ์œ„๋กœ ์ž๋ฅด๊ธฐ
215
+ sentences = text.split('.')
216
+ compressed = []
217
+ current_length = 0
218
+
219
+ for sentence in sentences:
220
+ if current_length + len(sentence) > max_length:
221
+ break
222
+ compressed.append(sentence)
223
+ current_length += len(sentence)
224
+
225
+ return '.'.join(compressed)
226
 
227
  async def chat_stream_async(
228
  self,
229
  messages: List[Dict],
230
  **kwargs
231
  ) -> AsyncGenerator[str, None]:
232
+ """๋น„๋™๊ธฐ ์ŠคํŠธ๋ฆฌ๋ฐ ๋Œ€ํ™” with retry"""
233
 
234
  payload = {
235
  "model": self.model,
 
241
  "stream": True
242
  }
243
 
244
+ for attempt in range(self.max_retries):
245
+ try:
246
+ async with aiohttp.ClientSession() as session:
247
+ async with session.post(
248
+ self.base_url,
249
+ headers={**self.headers, "Accept": "text/event-stream"},
250
+ json=payload,
251
+ timeout=aiohttp.ClientTimeout(total=30)
252
+ ) as response:
253
+ async for line in response.content:
254
+ line_str = line.decode('utf-8').strip()
255
+ if line_str.startswith("data: "):
256
+ data_str = line_str[6:]
257
+ if data_str == "[DONE]":
258
+ break
259
+ try:
260
+ data = json.loads(data_str)
261
+ if "choices" in data and len(data["choices"]) > 0:
262
+ delta = data["choices"][0].get("delta", {})
263
+ if "content" in delta:
264
+ yield delta["content"]
265
+ except json.JSONDecodeError:
266
+ continue
267
+ return # Success
268
+ except aiohttp.ClientError as e:
269
+ if attempt < self.max_retries - 1:
270
+ await asyncio.sleep(2 ** attempt)
271
+ continue
272
+ else:
273
+ yield f"Error after {self.max_retries} attempts: {str(e)}"
274
+ except Exception as e:
275
+ yield f"Unexpected error: {str(e)}"
276
+ break
277
 
278
 
279
  # ============================================================================
280
+ # ๊ฒฝ๋Ÿ‰ํ™”๋œ ์ถ”๋ก  ์ฒด์ธ (๋‹ค๊ตญ์–ด ์ง€์›)
281
  # ============================================================================
282
 
283
  class LightweightReasoningChain:
 
285
 
286
  def __init__(self):
287
  self.templates = {
288
+ "ko": {
289
+ "problem_solving": {
290
+ "steps": ["๋ฌธ์ œ ๋ถ„ํ•ด", "ํ•ต์‹ฌ ์š”์ธ", "ํ•ด๊ฒฐ ๋ฐฉ์•ˆ", "๊ตฌํ˜„ ์ „๋žต"],
291
+ "prompt": "์ฒด๊ณ„์ ์œผ๋กœ ๋‹จ๊ณ„๋ณ„๋กœ ๋ถ„์„ํ•˜๊ณ  ํ•ด๊ฒฐ์ฑ…์„ ์ œ์‹œํ•˜์„ธ์š”."
292
+ },
293
+ "creative_thinking": {
294
+ "steps": ["๊ธฐ์กด ์ ‘๊ทผ", "์ฐฝ์˜์  ๋Œ€์•ˆ", "ํ˜์‹  ํฌ์ธํŠธ", "์‹คํ–‰ ๋ฐฉ๋ฒ•"],
295
+ "prompt": "๊ธฐ์กด ๋ฐฉ์‹์„ ๋„˜์–ด์„  ์ฐฝ์˜์ ์ด๊ณ  ํ˜์‹ ์ ์ธ ์ ‘๊ทผ์„ ์ œ์‹œํ•˜์„ธ์š”."
296
+ },
297
+ "critical_analysis": {
298
+ "steps": ["ํ˜„ํ™ฉ ํ‰๊ฐ€", "๊ฐ•์ /์•ฝ์ ", "๊ธฐํšŒ/์œ„ํ˜‘", "๊ฐœ์„  ๋ฐฉํ–ฅ"],
299
+ "prompt": "๋น„ํŒ์  ๊ด€์ ์—์„œ ์ฒ ์ €ํžˆ ๋ถ„์„ํ•˜๊ณ  ๊ฐœ์„ ์ ์„ ๋„์ถœํ•˜์„ธ์š”."
300
+ }
301
  },
302
+ "en": {
303
+ "problem_solving": {
304
+ "steps": ["Problem Breakdown", "Key Factors", "Solutions", "Implementation Strategy"],
305
+ "prompt": "Systematically analyze step by step and provide solutions."
306
+ },
307
+ "creative_thinking": {
308
+ "steps": ["Traditional Approach", "Creative Alternatives", "Innovation Points", "Execution Method"],
309
+ "prompt": "Provide creative and innovative approaches beyond conventional methods."
310
+ },
311
+ "critical_analysis": {
312
+ "steps": ["Current Assessment", "Strengths/Weaknesses", "Opportunities/Threats", "Improvement Direction"],
313
+ "prompt": "Thoroughly analyze from a critical perspective and derive improvements."
314
+ }
315
  }
316
  }
317
 
318
+ def get_reasoning_structure(self, query_type: str, lang: str = 'ko') -> Dict:
319
  """์ฟผ๋ฆฌ ์œ ํ˜•์— ๋งž๋Š” ์ถ”๋ก  ๊ตฌ์กฐ ๋ฐ˜ํ™˜"""
320
+ lang_templates = self.templates.get(lang, self.templates['en'])
321
+ return lang_templates.get(query_type, lang_templates["problem_solving"])
322
+
323
+ def get_reasoning_pattern(self, query: str, lang: str = 'ko') -> Optional[Dict]:
324
+ """์ฟผ๋ฆฌ์— ์ ํ•ฉํ•œ ์ถ”๋ก  ํŒจํ„ด ๋ฐ˜ํ™˜"""
325
+ query_lower = query.lower()
326
+
327
+ # ์–ธ์–ด๋ณ„ ํ‚ค์›Œ๋“œ ๋งคํ•‘
328
+ pattern_keywords = {
329
+ 'ko': {
330
+ 'problem_solving': ['ํ•ด๊ฒฐ', '๋ฐฉ๋ฒ•', '์ „๋žต', '๊ณ„ํš'],
331
+ 'creative_thinking': ['์ฐฝ์˜์ ', 'ํ˜์‹ ์ ', '์ƒˆ๋กœ์šด', '์•„์ด๋””์–ด'],
332
+ 'critical_analysis': ['๋ถ„์„', 'ํ‰๊ฐ€', '๋น„๊ต', '์˜ํ–ฅ']
333
+ },
334
+ 'en': {
335
+ 'problem_solving': ['solve', 'solution', 'strategy', 'plan'],
336
+ 'creative_thinking': ['creative', 'innovative', 'novel', 'idea'],
337
+ 'critical_analysis': ['analyze', 'evaluate', 'compare', 'impact']
338
+ }
339
+ }
340
+
341
+ keywords = pattern_keywords.get(lang, pattern_keywords['en'])
342
+
343
+ for pattern_type, words in keywords.items():
344
+ if any(word in query_lower for word in words):
345
+ return self.get_reasoning_structure(pattern_type, lang)
346
+
347
+ return self.get_reasoning_structure('problem_solving', lang)
348
 
349
 
350
  # ============================================================================
351
+ # ์กฐ๊ธฐ ์ข…๋ฃŒ ๋ฉ”์ปค๋‹ˆ์ฆ˜ (๊ฐœ์„ ๋จ)
352
  # ============================================================================
353
 
354
  class QualityChecker:
 
363
  "clarity": 0.2
364
  }
365
 
366
+ def evaluate_response(self, response: str, query: str, lang: str = 'ko') -> Tuple[float, bool]:
367
+ """์‘๋‹ต ํ’ˆ์งˆ ํ‰๊ฐ€ (์–ธ์–ด๋ณ„)"""
368
  scores = {}
369
 
370
+ # ์–ธ์–ด๋ณ„ ์ตœ์†Œ ๊ธธ์ด ๊ธฐ์ค€
371
+ min_length = {'ko': 500, 'en': 400, 'ja': 400, 'zh': 300}
372
+ target_length = min_length.get(lang, 400)
373
+
374
  # ๊ธธ์ด ํ‰๊ฐ€
375
+ scores["length"] = min(len(response) / target_length, 1.0)
376
 
377
+ # ๊ตฌ์กฐ ํ‰๊ฐ€ (์–ธ์–ด๋ณ„ ๋งˆ์ปค)
378
+ structure_markers = {
379
+ 'ko': ["1.", "2.", "โ€ข", "-", "์ฒซ์งธ", "๋‘˜์งธ", "๊ฒฐ๋ก ", "์š”์•ฝ"],
380
+ 'en': ["1.", "2.", "โ€ข", "-", "First", "Second", "Conclusion", "Summary"],
381
+ 'ja': ["1.", "2.", "โ€ข", "-", "็ฌฌไธ€", "็ฌฌไบŒ", "็ต่ซ–", "่ฆ็ด„"],
382
+ 'zh': ["1.", "2.", "โ€ข", "-", "็ฌฌไธ€", "็ฌฌไบŒ", "็ป“่ฎบ", "ๆ€ป็ป“"]
383
+ }
384
+
385
+ markers = structure_markers.get(lang, structure_markers['en'])
386
+ scores["structure"] = sum(1 for m in markers if m in response) / len(markers)
387
 
388
  # ์™„์ „์„ฑ ํ‰๊ฐ€ (์ฟผ๋ฆฌ ํ‚ค์›Œ๋“œ ํฌํ•จ ์—ฌ๋ถ€)
389
  query_words = set(query.split())
 
391
  scores["completeness"] = len(query_words & response_words) / max(len(query_words), 1)
392
 
393
  # ๋ช…ํ™•์„ฑ ํ‰๊ฐ€ (๋ฌธ์žฅ ๊ตฌ์กฐ)
394
+ sentence_delimiters = {
395
+ 'ko': '.',
396
+ 'en': '.',
397
+ 'ja': 'ใ€‚',
398
+ 'zh': 'ใ€‚'
399
+ }
400
+ delimiter = sentence_delimiters.get(lang, '.')
401
+ sentences = response.split(delimiter)
402
  avg_sentence_length = sum(len(s.split()) for s in sentences) / max(len(sentences), 1)
403
+ scores["clarity"] = min(avg_sentence_length / 20, 1.0)
404
 
405
  # ๊ฐ€์ค‘ ํ‰๊ท  ๊ณ„์‚ฐ
406
  total_score = sum(
 
414
 
415
 
416
  # ============================================================================
417
+ # ์ŠคํŠธ๋ฆฌ๋ฐ ์ตœ์ ํ™” (๊ฐœ์„ ๋จ)
418
  # ============================================================================
419
 
420
  class OptimizedStreaming:
421
+ """์ŠคํŠธ๋ฆฌ๋ฐ ๋ฒ„ํผ ์ตœ์ ํ™” with adaptive buffering"""
422
 
423
+ def __init__(self, chunk_size: int = 20, flush_interval: float = 0.05):
424
  self.chunk_size = chunk_size
425
  self.flush_interval = flush_interval
426
  self.buffer = ""
427
  self.last_flush = time.time()
428
+ self.adaptive_size = chunk_size
429
 
430
  async def buffer_and_yield(
431
  self,
432
+ stream: AsyncGenerator[str, None],
433
+ adaptive: bool = True
434
  ) -> AsyncGenerator[str, None]:
435
+ """๋ฒ„ํผ๋ง๋œ ์ŠคํŠธ๋ฆฌ๋ฐ with adaptive sizing"""
436
 
437
+ chunk_count = 0
438
  async for chunk in stream:
439
  self.buffer += chunk
440
  current_time = time.time()
441
+ chunk_count += 1
442
 
443
+ # Adaptive chunk size based on stream speed
444
+ if adaptive and chunk_count % 10 == 0:
445
+ time_diff = current_time - self.last_flush
446
+ if time_diff < 0.02: # Too fast, increase buffer
447
+ self.adaptive_size = min(self.adaptive_size + 5, 100)
448
+ elif time_diff > 0.1: # Too slow, decrease buffer
449
+ self.adaptive_size = max(self.adaptive_size - 5, 10)
450
+
451
+ if (len(self.buffer) >= self.adaptive_size or
452
  current_time - self.last_flush >= self.flush_interval):
453
 
454
  yield self.buffer
 
461
 
462
 
463
  # ============================================================================
464
+ # ์‘๋‹ต ํ›„์ฒ˜๋ฆฌ ์œ ํ‹ธ๋ฆฌํ‹ฐ
465
+ # ============================================================================
466
+
467
+ class ResponseCleaner:
468
+ """์‘๋‹ต ์ •๋ฆฌ ๋ฐ ํฌ๋งทํŒ…"""
469
+
470
+ @staticmethod
471
+ def clean_response(response: str) -> str:
472
+ """๋ถˆํ•„์š”ํ•œ ๋งˆํฌ์—… ์ œ๊ฑฐ ๊ฐ•ํ™”"""
473
+ # ๋งˆํฌ๋‹ค์šด ํ—ค๋” ์ œ๊ฑฐ
474
+ response = re.sub(r'^#{1,6}\s+', '', response, flags=re.MULTILINE)
475
+
476
+ # ๋ถˆํ•„์š”ํ•œ ๊ตฌ๋ถ„์„  ์ œ๊ฑฐ
477
+ response = re.sub(r'\*{2,}|_{2,}|-{3,}', '', response)
478
+
479
+ # ์ค‘๋ณต ๊ณต๋ฐฑ ์ œ๊ฑฐ
480
+ response = re.sub(r'\n{3,}', '\n\n', response)
481
+
482
+ # ํŠน์ • ํŒจํ„ด ์ œ๊ฑฐ
483
+ unwanted_patterns = [
484
+ r'\| --- # ๐ŸŒฑ \*\*์ตœ์ข…ํ†ตํ•ฉ ๋‹ต๋ณ€:',
485
+ r'\*\*โ€“์˜ค๋ฅ˜: ---',
486
+ r'^\s*\*\*\[.*?\]\*\*\s*', # [ํƒœ๊ทธ] ํ˜•์‹ ์ œ๊ฑฐ
487
+ r'^\s*###\s*', # ### ์ œ๊ฑฐ
488
+ r'^\s*##\s*', # ## ์ œ๊ฑฐ
489
+ r'^\s*#\s*' # # ์ œ๊ฑฐ
490
+ ]
491
+
492
+ for pattern in unwanted_patterns:
493
+ response = re.sub(pattern, '', response, flags=re.MULTILINE)
494
+
495
+ return response.strip()
496
+
497
+
498
+ # ============================================================================
499
+ # ํ†ตํ•ฉ ์ตœ์ ํ™” ๋ฉ€ํ‹ฐ ์—์ด์ „ํŠธ ์‹œ์Šคํ…œ (์บ์‹ฑ ์ œ๊ฑฐ ๋ฒ„์ „)
500
  # ============================================================================
501
 
502
  class SpeedOptimizedMultiAgentSystem:
503
+ """์†๋„ ์ตœ์ ํ™”๋œ ๋ฉ€ํ‹ฐ ์—์ด์ „ํŠธ ์‹œ์Šคํ…œ (์บ์‹ฑ ์—†์Œ)"""
504
 
505
  def __init__(self):
506
  self.llm = OptimizedFireworksClient()
507
  self.search = AsyncBraveSearch()
 
508
  self.reasoning = LightweightReasoningChain()
509
  self.quality_checker = QualityChecker()
510
  self.streaming = OptimizedStreaming()
511
+ self.language_detector = LanguageDetector()
512
+ self.response_cleaner = ResponseCleaner()
 
513
 
514
  # ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ํ’€
515
  self.executor = ThreadPoolExecutor(max_workers=4)
516
 
517
+ def _init_compact_prompts(self, lang: str = 'ko') -> Dict:
518
+ """์••์ถ•๋œ ๊ณ ํšจ์œจ ํ”„๋กฌํ”„ํŠธ (์–ธ์–ด๋ณ„)"""
519
+ prompts = {
520
+ 'ko': {
521
+ AgentRole.SUPERVISOR: """[๊ฐ๋…์ž-๊ตฌ์กฐ์„ค๊ณ„]
522
  ์ฆ‰์‹œ๋ถ„์„: ํ•ต์‹ฌ์˜๋„+ํ•„์š”์ •๋ณด+๋‹ต๋ณ€๊ตฌ์กฐ
523
  ์ถœ๋ ฅ: 5๊ฐœ ํ•ต์‹ฌํฌ์ธํŠธ(๊ฐ 1๋ฌธ์žฅ)
524
  ์ถ”๋ก ์ฒด๊ณ„ ๋ช…์‹œ""",
525
+
526
+ AgentRole.CREATIVE: """[์ฐฝ์˜์„ฑ์ƒ์„ฑ์ž]
527
  ์ž…๋ ฅ๊ตฌ์กฐ ๋”ฐ๋ผ ์ฐฝ์˜์  ํ™•์žฅ
528
  ์‹ค์šฉ์˜ˆ์‹œ+ํ˜์‹ ์ ‘๊ทผ+๊ตฌ์ฒด์กฐ์–ธ
529
  ๋ถˆํ•„์š”์„ค๋ช… ์ œ๊ฑฐ""",
530
+
531
+ AgentRole.CRITIC: """[๋น„ํ‰์ž-๊ฒ€์ฆ]
532
  ์‹ ์†๊ฒ€ํ† : ์ •ํ™•์„ฑ/๋…ผ๋ฆฌ์„ฑ/์‹ค์šฉ์„ฑ
533
  ๊ฐœ์„ ํฌ์ธํŠธ 3๊ฐœ๋งŒ
534
  ๊ฐ 2๋ฌธ์žฅ ์ด๋‚ด""",
535
+
536
+ AgentRole.FINALIZER: """[์ตœ์ข…ํ†ตํ•ฉ]
537
  ๋ชจ๋“ ์˜๊ฒฌ ์ข…ํ•ฉโ†’์ตœ์ ๋‹ต๋ณ€
538
  ๋ช…ํ™•๊ตฌ์กฐ+์‹ค์šฉ์ •๋ณด+์ฐฝ์˜๊ท ํ˜•
539
+ ๋ฐ”๋กœ ํ•ต์‹ฌ ๋‚ด์šฉ๋ถ€ํ„ฐ ์‹œ์ž‘. ๋ถˆํ•„์š”ํ•œ ํ—ค๋”๋‚˜ ๋งˆํฌ์—… ์—†์ด. ๋งˆํฌ๋‹ค์šด ํ—ค๋”(#, ##, ###) ์‚ฌ์šฉ ๊ธˆ์ง€."""
540
+ },
541
+ 'en': {
542
+ AgentRole.SUPERVISOR: """[Supervisor-Structure]
543
+ Immediate analysis: core intent+required info+answer structure
544
+ Output: 5 key points (1 sentence each)
545
+ Clear reasoning framework""",
546
+
547
+ AgentRole.CREATIVE: """[Creative Generator]
548
+ Follow structure, expand creatively
549
+ Practical examples+innovative approach+specific advice
550
+ Remove unnecessary explanations""",
551
+
552
+ AgentRole.CRITIC: """[Critic-Verification]
553
+ Quick review: accuracy/logic/practicality
554
+ Only 3 improvement points
555
+ Max 2 sentences each""",
556
+
557
+ AgentRole.FINALIZER: """[Final Integration]
558
+ Synthesize all inputsโ†’optimal answer
559
+ Clear structure+practical info+creative balance
560
+ Start with core content directly. No unnecessary headers or markup. No markdown headers (#, ##, ###)."""
561
+ },
562
+ 'ja': {
563
+ AgentRole.SUPERVISOR: """[็›ฃ็ฃ่€…-ๆง‹้€ ่จญ่จˆ]
564
+ ๅณๆ™‚ๅˆ†ๆž๏ผšๆ ธๅฟƒๆ„ๅ›ณ+ๅฟ…่ฆๆƒ…ๅ ฑ+ๅ›ž็ญ”ๆง‹้€ 
565
+ ๅ‡บๅŠ›๏ผš5ใคใฎๆ ธๅฟƒใƒใ‚คใƒณใƒˆ๏ผˆๅ„1ๆ–‡๏ผ‰
566
+ ๆŽจ่ซ–ไฝ“็ณปๆ˜Ž็คบ""",
567
+
568
+ AgentRole.CREATIVE: """[ๅ‰ต้€ ๆ€ง็”Ÿๆˆ่€…]
569
+ ๅ…ฅๅŠ›ๆง‹้€ ใซๅพ“ใฃใฆๅ‰ต้€ ็š„ๆ‹กๅผต
570
+ ๅฎŸ็”จไพ‹+้ฉๆ–ฐ็š„ใ‚ขใƒ—ใƒญใƒผใƒ+ๅ…ทไฝ“็š„ใ‚ขใƒ‰ใƒใ‚คใ‚น
571
+ ไธ่ฆใช่ชฌๆ˜Žๅ‰Š้™ค""",
572
+
573
+ AgentRole.CRITIC: """[ๆ‰น่ฉ•่€…-ๆคœ่จผ]
574
+ ่ฟ…้€Ÿใƒฌใƒ“ใƒฅใƒผ๏ผšๆญฃ็ขบๆ€ง/่ซ–็†ๆ€ง/ๅฎŸ็”จๆ€ง
575
+ ๆ”นๅ–„ใƒใ‚คใƒณใƒˆ3ใคใฎใฟ
576
+ ๅ„2ๆ–‡ไปฅๅ†…""",
577
+
578
+ AgentRole.FINALIZER: """[ๆœ€็ต‚็ตฑๅˆ]
579
+ ๅ…จๆ„่ฆ‹็ตฑๅˆโ†’ๆœ€้ฉๅ›ž็ญ”
580
+ ๆ˜Ž็ขบๆง‹้€ +ๅฎŸ็”จๆƒ…ๅ ฑ+ๅ‰ต้€ ๆ€งใƒใƒฉใƒณใ‚น
581
+ ๆ ธๅฟƒๅ†…ๅฎนใ‹ใ‚‰็›ดๆŽฅ้–‹ๅง‹ใ€‚ไธ่ฆใชใƒ˜ใƒƒใƒ€ใƒผใ‚„ใƒžใƒผใ‚ฏใ‚ขใƒƒใƒ—ใชใ—ใ€‚ใƒžใƒผใ‚ฏใƒ€ใ‚ฆใƒณใƒ˜ใƒƒใƒ€ใƒผ๏ผˆ#ใ€##ใ€###๏ผ‰ไฝฟ็”จ็ฆๆญขใ€‚"""
582
+ },
583
+ 'zh': {
584
+ AgentRole.SUPERVISOR: """[ไธป็ฎก-็ป“ๆž„่ฎพ่ฎก]
585
+ ็ซ‹ๅณๅˆ†ๆž๏ผšๆ ธๅฟƒๆ„ๅ›พ+ๆ‰€้œ€ไฟกๆฏ+็ญ”ๆกˆ็ป“ๆž„
586
+ ่พ“ๅ‡บ๏ผš5ไธชๆ ธๅฟƒ่ฆ็‚น๏ผˆๆฏไธช1ๅฅ๏ผ‰
587
+ ๆŽจ็†ไฝ“็ณปๆ˜Ž็กฎ""",
588
+
589
+ AgentRole.CREATIVE: """[ๅˆ›ๆ„็”Ÿๆˆๅ™จ]
590
+ ๆŒ‰็ป“ๆž„ๅˆ›้€ ๆ€งๆ‰ฉๅฑ•
591
+ ๅฎž็”จ็คบไพ‹+ๅˆ›ๆ–ฐๆ–นๆณ•+ๅ…ทไฝ“ๅปบ่ฎฎ
592
+ ๅˆ ้™คไธๅฟ…่ฆ็š„่งฃ้‡Š""",
593
+
594
+ AgentRole.CRITIC: """[่ฏ„่ฎบๅฎถ-้ชŒ่ฏ]
595
+ ๅฟซ้€ŸๅฎกๆŸฅ๏ผšๅ‡†็กฎๆ€ง/้€ป่พ‘ๆ€ง/ๅฎž็”จๆ€ง
596
+ ไป…3ไธชๆ”น่ฟ›็‚น
597
+ ๆฏไธชๆœ€ๅคš2ๅฅ""",
598
+
599
+ AgentRole.FINALIZER: """[ๆœ€็ปˆๆ•ดๅˆ]
600
+ ็ปผๅˆๆ‰€ๆœ‰ๆ„่งโ†’ๆœ€ไฝณ็ญ”ๆกˆ
601
+ ๆธ…ๆ™ฐ็ป“ๆž„+ๅฎž็”จไฟกๆฏ+ๅˆ›ๆ„ๅนณ่กก
602
+ ็›ดๆŽฅไปŽๆ ธๅฟƒๅ†…ๅฎนๅผ€ๅง‹ใ€‚ๆ— ้œ€ไธๅฟ…่ฆ็š„ๆ ‡้ข˜ๆˆ–ๆ ‡่ฎฐใ€‚็ฆๆญขไฝฟ็”จMarkdownๆ ‡้ข˜๏ผˆ#ใ€##ใ€###๏ผ‰ใ€‚"""
603
+ }
604
  }
605
+
606
+ return prompts.get(lang, prompts['en'])
607
 
608
  async def parallel_process_agents(
609
  self,
610
  query: str,
611
  search_results: List[Dict],
612
+ show_progress: bool = True,
613
+ lang: str = None
614
  ) -> AsyncGenerator[Tuple[str, str], None]:
615
+ """๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ํŒŒ์ดํ”„๋ผ์ธ (์บ์‹ฑ ์—†์Œ)"""
616
 
617
  start_time = time.time()
618
+
619
+ # ์–ธ์–ด ์ž๋™ ๊ฐ์ง€
620
+ if lang is None:
621
+ lang = self.language_detector.detect_language(query)
622
+
623
+ # ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ์„ค์ •
624
+ self.compact_prompts = self._init_compact_prompts(lang)
625
+
626
  search_context = self._format_search_results(search_results)
627
  accumulated_response = ""
628
  agent_thoughts = ""
629
 
 
 
 
 
 
 
630
  # ์ถ”๋ก  ํŒจํ„ด ๊ฒฐ์ •
631
+ reasoning_pattern = self.reasoning.get_reasoning_pattern(query, lang)
632
 
633
  try:
634
  # === 1๋‹จ๊ณ„: ๊ฐ๋…์ž + ๊ฒ€์ƒ‰ ๋ณ‘๋ ฌ ์‹คํ–‰ ===
635
  if show_progress:
636
+ progress_msg = {
637
+ 'ko': "๐Ÿš€ ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ์‹œ์ž‘\n๐Ÿ‘” ๊ฐ๋…์ž ๋ถ„์„ + ๐Ÿ” ์ถ”๊ฐ€ ๊ฒ€์ƒ‰ ๋™์‹œ ์ง„ํ–‰...\n\n",
638
+ 'en': "๐Ÿš€ Starting parallel processing\n๐Ÿ‘” Supervisor analysis + ๐Ÿ” Additional search in progress...\n\n",
639
+ 'ja': "๐Ÿš€ ไธฆๅˆ—ๅ‡ฆ็†้–‹ๅง‹\n๐Ÿ‘” ็›ฃ็ฃ่€…ๅˆ†ๆž + ๐Ÿ” ่ฟฝๅŠ ๆคœ็ดขๅŒๆ™‚้€ฒ่กŒไธญ...\n\n",
640
+ 'zh': "๐Ÿš€ ๅผ€ๅง‹ๅนถ่กŒๅค„็†\n๐Ÿ‘” ไธป็ฎกๅˆ†ๆž + ๐Ÿ” ้™„ๅŠ ๆœ็ดขๅŒๆ—ถ่ฟ›่กŒ...\n\n"
641
+ }
642
+ agent_thoughts = progress_msg.get(lang, progress_msg['en'])
643
  yield accumulated_response, agent_thoughts
644
 
645
+ # ๊ฐ๋…์ž ํ”„๋กฌํ”„ํŠธ (์–ธ์–ด๋ณ„)
646
+ supervisor_prompt_templates = {
647
+ 'ko': f"""
648
  ์งˆ๋ฌธ: {query}
649
  ๊ฒ€์ƒ‰๊ฒฐ๊ณผ: {search_context}
650
  ์ถ”๋ก ํŒจํ„ด: {reasoning_pattern}
651
+ ์ฆ‰์‹œ ํ•ต์‹ฌ๊ตฌ์กฐ 5๊ฐœ ์ œ์‹œ""",
652
+ 'en': f"""
653
+ Question: {query}
654
+ Search results: {search_context}
655
+ Reasoning pattern: {reasoning_pattern}
656
+ Immediately provide 5 key structures""",
657
+ 'ja': f"""
658
+ ่ณชๅ•: {query}
659
+ ๆคœ็ดข็ตๆžœ: {search_context}
660
+ ๆŽจ่ซ–ใƒ‘ใ‚ฟใƒผใƒณ: {reasoning_pattern}
661
+ ๅณๅบงใซ5ใคใฎๆ ธๅฟƒๆง‹้€ ใ‚’ๆ็คบ""",
662
+ 'zh': f"""
663
+ ้—ฎ้ข˜: {query}
664
+ ๆœ็ดข็ป“ๆžœ: {search_context}
665
+ ๆŽจ็†ๆจกๅผ: {reasoning_pattern}
666
+ ็ซ‹ๅณๆไพ›5ไธชๆ ธๅฟƒ็ป“ๆž„"""
667
+ }
668
+
669
+ supervisor_prompt = supervisor_prompt_templates.get(lang, supervisor_prompt_templates['en'])
670
 
671
  supervisor_response = ""
672
  supervisor_task = self.llm.chat_stream_async(
 
682
  async for chunk in self.streaming.buffer_and_yield(supervisor_task):
683
  supervisor_response += chunk
684
  if show_progress and len(supervisor_response) < 300:
685
+ supervisor_label = {
686
+ 'ko': "๐Ÿ‘” ๊ฐ๋…์ž ๋ถ„์„",
687
+ 'en': "๐Ÿ‘” Supervisor Analysis",
688
+ 'ja': "๐Ÿ‘” ็›ฃ็ฃ่€…ๅˆ†ๆž",
689
+ 'zh': "๐Ÿ‘” ไธป็ฎกๅˆ†ๆž"
690
+ }
691
+ agent_thoughts = f"{supervisor_label.get(lang, supervisor_label['en'])}\n{supervisor_response[:300]}...\n\n"
692
  yield accumulated_response, agent_thoughts
693
 
694
  # === 2๋‹จ๊ณ„: ์ฐฝ์˜์„ฑ + ๋น„ํ‰ ์ค€๋น„ ๋ณ‘๋ ฌ ===
695
  if show_progress:
696
+ creative_msg = {
697
+ 'ko': "๐ŸŽจ ์ฐฝ์˜์„ฑ ์ƒ์„ฑ์ž + ๐Ÿ” ๋น„ํ‰์ž ์ค€๋น„...\n\n",
698
+ 'en': "๐ŸŽจ Creative Generator + ๐Ÿ” Critic preparing...\n\n",
699
+ 'ja': "๐ŸŽจ ๅ‰ต้€ ๆ€ง็”Ÿๆˆ่€… + ๐Ÿ” ๆ‰น่ฉ•่€…ๆบ–ๅ‚™ไธญ...\n\n",
700
+ 'zh': "๐ŸŽจ ๅˆ›ๆ„็”Ÿๆˆๅ™จ + ๐Ÿ” ่ฏ„่ฎบๅฎถๅ‡†ๅค‡ไธญ...\n\n"
701
+ }
702
+ agent_thoughts += creative_msg.get(lang, creative_msg['en'])
703
  yield accumulated_response, agent_thoughts
704
 
705
+ # ์ฐฝ์˜์„ฑ ์ƒ์„ฑ ์‹œ์ž‘ (์–ธ์–ด๋ณ„)
706
+ creative_prompt_templates = {
707
+ 'ko': f"""
708
  ์งˆ๋ฌธ: {query}
709
  ๊ฐ๋…์ž๊ตฌ์กฐ: {supervisor_response}
710
  ๊ฒ€์ƒ‰๊ฒฐ๊ณผ: {search_context}
711
+ ์ฐฝ์˜์ +์‹ค์šฉ์  ๋‹ต๋ณ€ ์ฆ‰์‹œ์ƒ์„ฑ""",
712
+ 'en': f"""
713
+ Question: {query}
714
+ Supervisor structure: {supervisor_response}
715
+ Search results: {search_context}
716
+ Generate creative+practical answer immediately""",
717
+ 'ja': f"""
718
+ ่ณชๅ•: {query}
719
+ ็›ฃ็ฃ่€…ๆง‹้€ : {supervisor_response}
720
+ ๆคœ็ดข็ตๆžœ: {search_context}
721
+ ๅ‰ต้€ ็š„+ๅฎŸ็”จ็š„ๅ›ž็ญ”ๅณๅบง็”Ÿๆˆ""",
722
+ 'zh': f"""
723
+ ้—ฎ้ข˜: {query}
724
+ ไธป็ฎก็ป“ๆž„: {supervisor_response}
725
+ ๆœ็ดข็ป“ๆžœ: {search_context}
726
+ ็ซ‹ๅณ็”Ÿๆˆๅˆ›ๆ„+ๅฎž็”จ็ญ”ๆกˆ"""
727
+ }
728
+
729
+ creative_prompt = creative_prompt_templates.get(lang, creative_prompt_templates['en'])
730
 
731
  creative_response = ""
732
+ creative_partial = ""
733
  critic_started = False
734
  critic_response = ""
735
 
 
751
  if len(creative_partial) > 500 and not critic_started:
752
  critic_started = True
753
 
754
+ # ๋น„ํ‰์ž ๋น„๋™๊ธฐ ์‹œ์ž‘ (์–ธ์–ด๋ณ„)
755
+ critic_prompt_templates = {
756
+ 'ko': f"""
757
  ์›๋ณธ์งˆ๋ฌธ: {query}
758
  ์ฐฝ์˜์„ฑ๋‹ต๋ณ€(์ผ๋ถ€): {creative_partial}
759
+ ์‹ ์†๊ฒ€ํ† โ†’๊ฐœ์„ ์ 3๊ฐœ""",
760
+ 'en': f"""
761
+ Original question: {query}
762
+ Creative answer (partial): {creative_partial}
763
+ Quick reviewโ†’3 improvements""",
764
+ 'ja': f"""
765
+ ๅ…ƒใฎ่ณชๅ•: {query}
766
+ ๅ‰ต้€ ็š„ๅ›ž็ญ”๏ผˆไธ€้ƒจ๏ผ‰: {creative_partial}
767
+ ่ฟ…้€Ÿใƒฌใƒ“ใƒฅใƒผโ†’ๆ”นๅ–„็‚น3ใค""",
768
+ 'zh': f"""
769
+ ๅŽŸๅง‹้—ฎ้ข˜: {query}
770
+ ๅˆ›ๆ„็ญ”ๆกˆ๏ผˆ้ƒจๅˆ†๏ผ‰: {creative_partial}
771
+ ๅฟซ้€ŸๅฎกๆŸฅโ†’3ไธชๆ”น่ฟ›็‚น"""
772
+ }
773
+
774
+ critic_prompt = critic_prompt_templates.get(lang, critic_prompt_templates['en'])
775
 
776
  critic_task = asyncio.create_task(
777
  self._run_critic_async(critic_prompt)
 
779
 
780
  if show_progress:
781
  display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response
782
+ creative_label = {
783
+ 'ko': "๐ŸŽจ ์ฐฝ์˜์„ฑ ์ƒ์„ฑ์ž",
784
+ 'en': "๐ŸŽจ Creative Generator",
785
+ 'ja': "๐ŸŽจ ๅ‰ต้€ ๆ€ง็”Ÿๆˆ่€…",
786
+ 'zh': "๐ŸŽจ ๅˆ›ๆ„็”Ÿๆˆๅ™จ"
787
+ }
788
+ agent_thoughts = f"{creative_label.get(lang, creative_label['en'])}\n{display_creative}\n\n"
789
  yield accumulated_response, agent_thoughts
790
 
791
  # ๋น„ํ‰์ž ๊ฒฐ๊ณผ ๋Œ€๊ธฐ
 
793
  critic_response = await critic_task
794
 
795
  if show_progress:
796
+ critic_label = {
797
+ 'ko': "๐Ÿ” ๋น„ํ‰์ž ๊ฒ€ํ† ",
798
+ 'en': "๐Ÿ” Critic Review",
799
+ 'ja': "๐Ÿ” ๆ‰น่ฉ•่€…ใƒฌใƒ“ใƒฅใƒผ",
800
+ 'zh': "๐Ÿ” ่ฏ„่ฎบๅฎถๅฎกๆŸฅ"
801
+ }
802
+ agent_thoughts += f"{critic_label.get(lang, critic_label['en'])}\n{critic_response[:200]}...\n\n"
803
  yield accumulated_response, agent_thoughts
804
 
805
  # === 3๋‹จ๊ณ„: ํ’ˆ์งˆ ์ฒดํฌ ๋ฐ ์กฐ๊ธฐ ์ข…๋ฃŒ ===
806
  quality_score, need_more = self.quality_checker.evaluate_response(
807
+ creative_response, query, lang
808
  )
809
 
810
  if not need_more and quality_score > 0.85:
811
  # ํ’ˆ์งˆ์ด ์ถฉ๋ถ„ํžˆ ๋†’์œผ๋ฉด ๋ฐ”๋กœ ๋ฐ˜ํ™˜
812
+ accumulated_response = self.response_cleaner.clean_response(creative_response)
813
 
814
  if show_progress:
815
+ quality_msg = {
816
+ 'ko': f"โœ… ํ’ˆ์งˆ ์ถฉ์กฑ (์ ์ˆ˜: {quality_score:.2f})\n์กฐ๊ธฐ ์™„๋ฃŒ!\n",
817
+ 'en': f"โœ… Quality met (score: {quality_score:.2f})\nEarly completion!\n",
818
+ 'ja': f"โœ… ๅ“่ณชๆบ€่ถณ (ใ‚นใ‚ณใ‚ข: {quality_score:.2f})\nๆ—ฉๆœŸๅฎŒไบ†!\n",
819
+ 'zh': f"โœ… ่ดจ้‡ๆปก่ถณ (ๅˆ†ๆ•ฐ: {quality_score:.2f})\nๆๅ‰ๅฎŒๆˆ!\n"
820
+ }
821
+ agent_thoughts += quality_msg.get(lang, quality_msg['en'])
822
 
823
  yield accumulated_response, agent_thoughts
824
  return
825
 
826
  # === 4๋‹จ๊ณ„: ์ตœ์ข… ํ†ตํ•ฉ (์ŠคํŠธ๋ฆฌ๋ฐ) ===
827
  if show_progress:
828
+ final_msg = {
829
+ 'ko': "โœ… ์ตœ์ข… ํ†ตํ•ฉ ์ค‘...\n\n",
830
+ 'en': "โœ… Final integration in progress...\n\n",
831
+ 'ja': "โœ… ๆœ€็ต‚็ตฑๅˆไธญ...\n\n",
832
+ 'zh': "โœ… ๆœ€็ปˆๆ•ดๅˆไธญ...\n\n"
833
+ }
834
+ agent_thoughts += final_msg.get(lang, final_msg['en'])
835
  yield accumulated_response, agent_thoughts
836
 
837
+ # ์ตœ์ข… ํ”„๋กฌํ”„ํŠธ (์–ธ์–ด๋ณ„)
838
+ final_prompt_templates = {
839
+ 'ko': f"""
840
  ์งˆ๋ฌธ: {query}
841
  ์ฐฝ์˜์„ฑ๋‹ต๋ณ€: {creative_response}
842
  ๋น„ํ‰ํ”ผ๋“œ๋ฐฑ: {critic_response}
843
  ๊ฐ๋…์ž๊ตฌ์กฐ: {supervisor_response}
844
+ ์ตœ์ข…ํ†ตํ•ฉโ†’์™„๋ฒฝ๋‹ต๋ณ€. ๋งˆํฌ๋‹ค์šด ํ—ค๋”(#, ##, ###) ์‚ฌ์šฉ ๊ธˆ์ง€.""",
845
+ 'en': f"""
846
+ Question: {query}
847
+ Creative answer: {creative_response}
848
+ Critic feedback: {critic_response}
849
+ Supervisor structure: {supervisor_response}
850
+ Final integrationโ†’perfect answer. No markdown headers (#, ##, ###).""",
851
+ 'ja': f"""
852
+ ่ณชๅ•: {query}
853
+ ๅ‰ต้€ ็š„ๅ›ž็ญ”: {creative_response}
854
+ ๆ‰น่ฉ•ใƒ•ใ‚ฃใƒผใƒ‰ใƒใƒƒใ‚ฏ: {critic_response}
855
+ ็›ฃ็ฃ่€…ๆง‹้€ : {supervisor_response}
856
+ ๆœ€็ต‚็ตฑๅˆโ†’ๅฎŒ็’งใชๅ›ž็ญ”ใ€‚ใƒžใƒผใ‚ฏใƒ€ใ‚ฆใƒณใƒ˜ใƒƒใƒ€ใƒผ๏ผˆ#ใ€##ใ€###๏ผ‰ไฝฟ็”จ็ฆๆญขใ€‚""",
857
+ 'zh': f"""
858
+ ้—ฎ้ข˜: {query}
859
+ ๅˆ›ๆ„็ญ”ๆกˆ: {creative_response}
860
+ ่ฏ„่ฎบๅ้ฆˆ: {critic_response}
861
+ ไธป็ฎก็ป“ๆž„: {supervisor_response}
862
+ ๆœ€็ปˆๆ•ดๅˆโ†’ๅฎŒ็พŽ็ญ”ๆกˆใ€‚็ฆๆญขไฝฟ็”จMarkdownๆ ‡้ข˜๏ผˆ#ใ€##ใ€###๏ผ‰ใ€‚"""
863
+ }
864
+
865
+ final_prompt = final_prompt_templates.get(lang, final_prompt_templates['en'])
866
 
867
  final_task = self.llm.chat_stream_async(
868
  messages=[
 
875
 
876
  # ์ตœ์ข… ๋‹ต๋ณ€ ์ŠคํŠธ๋ฆฌ๋ฐ
877
  accumulated_response = ""
878
+
879
+ async for chunk in final_task:
880
  accumulated_response += chunk
881
+ # ์‹ค์‹œ๊ฐ„ ์ •๋ฆฌ
882
+ cleaned_response = self.response_cleaner.clean_response(accumulated_response)
883
+ yield cleaned_response, agent_thoughts
884
 
885
+ # ์ตœ์ข… ์ •๋ฆฌ
886
+ accumulated_response = self.response_cleaner.clean_response(accumulated_response)
 
887
 
888
+ # ์ฒ˜๋ฆฌ ์‹œ๊ฐ„ ์ถ”๊ฐ€ (์–ธ์–ด๋ณ„)
889
+ processing_time = time.time() - start_time
890
+ time_msg = {
891
+ 'ko': f"\n\n---\nโšก ์ฒ˜๋ฆฌ ์‹œ๊ฐ„: {processing_time:.1f}์ดˆ",
892
+ 'en': f"\n\n---\nโšก Processing time: {processing_time:.1f} seconds",
893
+ 'ja': f"\n\n---\nโšก ๅ‡ฆ็†ๆ™‚้–“: {processing_time:.1f}็ง’",
894
+ 'zh': f"\n\n---\nโšก ๅค„็†ๆ—ถ้—ด: {processing_time:.1f}็ง’"
895
+ }
896
+ accumulated_response += time_msg.get(lang, time_msg['en'])
897
 
898
  yield accumulated_response, agent_thoughts
899
 
900
  except Exception as e:
901
+ error_msg = {
902
+ 'ko': f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}",
903
+ 'en': f"โŒ Error occurred: {str(e)}",
904
+ 'ja': f"โŒ ใ‚จใƒฉใƒผ็™บ็”Ÿ: {str(e)}",
905
+ 'zh': f"โŒ ๅ‘็”Ÿ้”™่ฏฏ: {str(e)}"
906
+ }
907
+ yield error_msg.get(lang, error_msg['en']), agent_thoughts
908
 
909
  async def _run_critic_async(self, prompt: str) -> str:
910
+ """๋น„ํ‰์ž ๋น„๋™๊ธฐ ์‹คํ–‰ with error handling"""
911
  try:
912
  response = ""
913
  async for chunk in self.llm.chat_stream_async(
 
920
  ):
921
  response += chunk
922
  return response
923
+ except Exception as e:
924
+ # ์–ธ์–ด ๊ฐ์ง€
925
+ lang = 'ko' if '์งˆ๋ฌธ' in prompt else 'en'
926
+ error_msg = {
927
+ 'ko': "๋น„ํ‰ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜",
928
+ 'en': "Error during critic processing",
929
+ 'ja': "ๆ‰น่ฉ•ๅ‡ฆ็†ไธญใฎใ‚จใƒฉใƒผ",
930
+ 'zh': "่ฏ„่ฎบๅค„็†ไธญๅ‡บ้”™"
931
+ }
932
+ return error_msg.get(lang, error_msg['en'])
933
 
934
  def _format_search_results(self, results: List[Dict]) -> str:
935
  """๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์••์ถ• ํฌ๋งท"""
936
  if not results:
937
+ return "No search results"
938
 
939
  formatted = []
940
+ for i, r in enumerate(results[:3], 1):
941
+ title = r.get('title', '')[:50]
942
+ desc = r.get('description', '')[:100]
943
+ formatted.append(f"[{i}]{title}:{desc}")
944
 
945
  return " | ".join(formatted)
946
 
947
 
948
  # ============================================================================
949
+ # Gradio UI (์ตœ์ ํ™” ๋ฒ„์ „ - ์บ์‹ฑ ์ œ๊ฑฐ)
950
  # ============================================================================
951
 
952
  def create_optimized_gradio_interface():
953
+ """์ตœ์ ํ™”๋œ Gradio ์ธํ„ฐํŽ˜์ด์Šค (์บ์‹ฑ ์—†์Œ)"""
954
 
955
  # ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”
956
  system = SpeedOptimizedMultiAgentSystem()
 
960
  history: List[Dict],
961
  use_search: bool,
962
  show_agent_thoughts: bool,
963
+ search_count: int,
964
+ language_mode: str
965
  ):
966
+ """์ตœ์ ํ™”๋œ ์ฟผ๋ฆฌ ์ฒ˜๋ฆฌ - ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ ๋ฒ„์ „"""
967
 
968
  if not message:
969
  yield history, "", ""
970
  return
971
 
972
+ # ์–ธ์–ด ์„ค์ •
973
+ if language_mode == "Auto":
974
+ lang = None # ์ž๋™ ๊ฐ์ง€
975
+ else:
976
+ lang_map = {"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh"}
977
+ lang = lang_map.get(language_mode, None)
978
+
979
  # ๋น„๋™๊ธฐ ํ•จ์ˆ˜๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰
980
  try:
981
  import nest_asyncio
982
  nest_asyncio.apply()
983
  except ImportError:
984
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
985
 
986
  try:
987
  # ๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ (๋™๊ธฐํ™”)
988
  search_results = []
989
  search_display = ""
990
 
991
+ # ์–ธ์–ด ์ž๋™ ๊ฐ์ง€ (ํ•„์š”ํ•œ ๊ฒฝ์šฐ)
992
+ detected_lang = lang or system.language_detector.detect_language(message)
993
+
994
  if use_search:
995
  # ๊ฒ€์ƒ‰ ์ƒํƒœ ํ‘œ์‹œ
996
+ processing_msg = {
997
+ 'ko': "โšก ๊ณ ์† ์ฒ˜๋ฆฌ ์ค‘...",
998
+ 'en': "โšก High-speed processing...",
999
+ 'ja': "โšก ้ซ˜้€Ÿๅ‡ฆ็†ไธญ...",
1000
+ 'zh': "โšก ้ซ˜้€Ÿๅค„็†ไธญ..."
1001
+ }
1002
  history_with_message = history + [
1003
  {"role": "user", "content": message},
1004
+ {"role": "assistant", "content": processing_msg.get(detected_lang, processing_msg['en'])}
1005
  ]
1006
  yield history_with_message, "", ""
1007
 
1008
  # ๋น„๋™๊ธฐ ๊ฒ€์ƒ‰์„ ๋™๊ธฐ์ ์œผ๋กœ ์‹คํ–‰
1009
+ async def search_wrapper():
1010
+ return await system.search.search_async(message, count=search_count, lang=detected_lang)
1011
+
1012
+ loop = asyncio.new_event_loop()
1013
+ asyncio.set_event_loop(loop)
1014
+ search_results = loop.run_until_complete(search_wrapper())
1015
 
1016
  if search_results:
1017
+ ref_label = {
1018
+ 'ko': "๐Ÿ“š ์ฐธ๊ณ  ์ž๋ฃŒ",
1019
+ 'en': "๐Ÿ“š References",
1020
+ 'ja': "๐Ÿ“š ๅ‚่€ƒ่ณ‡ๆ–™",
1021
+ 'zh': "๐Ÿ“š ๅ‚๏ฟฝ๏ฟฝ๏ฟฝ่ต„ๆ–™"
1022
+ }
1023
+ search_display = f"{ref_label.get(detected_lang, ref_label['en'])}\n\n"
1024
  for i, result in enumerate(search_results[:3], 1):
1025
  search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n"
1026
  search_display += f" {result['description'][:100]}...\n\n"
 
1028
  # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
1029
  current_history = history + [{"role": "user", "content": message}]
1030
 
1031
+ # ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ์„ ์œ„ํ•œ ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ
1032
+ async def stream_responses():
1033
+ """์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ ์ œ๋„ˆ๋ ˆ์ดํ„ฐ"""
1034
  async for response, thoughts in system.parallel_process_agents(
1035
  query=message,
1036
  search_results=search_results,
1037
+ show_progress=show_agent_thoughts,
1038
+ lang=detected_lang
1039
  ):
1040
+ yield response, thoughts
 
1041
 
1042
+ # ์ƒˆ ์ด๋ฒคํŠธ ๋ฃจํ”„์—์„œ ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ
1043
+ loop = asyncio.new_event_loop()
1044
+ asyncio.set_event_loop(loop)
1045
 
1046
+ # ๋น„๋™๊ธฐ ์ œ๋„ˆ๋ ˆ์ดํ„ฐ๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ์ˆœํšŒ
1047
+ gen = stream_responses()
1048
+
1049
+ while True:
1050
+ try:
1051
+ # ๋‹ค์Œ ํ•ญ๋ชฉ ๊ฐ€์ ธ์˜ค๊ธฐ
1052
+ task = asyncio.ensure_future(gen.__anext__(), loop=loop)
1053
+ response, thoughts = loop.run_until_complete(task)
1054
+
1055
+ # ์‹ค์‹œ๊ฐ„ ์—…๋ฐ์ดํŠธ
1056
+ updated_history = current_history + [
1057
+ {"role": "assistant", "content": response}
1058
+ ]
1059
+ yield updated_history, thoughts, search_display
1060
+
1061
+ except StopAsyncIteration:
1062
+ break
1063
 
1064
  except Exception as e:
1065
  error_history = history + [
1066
  {"role": "user", "content": message},
1067
+ {"role": "assistant", "content": f"โŒ Error: {str(e)}"}
1068
  ]
1069
  yield error_history, "", ""
1070
+ finally:
1071
+ # ๋ฃจํ”„ ์ •๋ฆฌ
1072
+ try:
1073
+ loop.close()
1074
+ except:
1075
+ pass
1076
 
1077
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค
1078
  with gr.Blocks(
1079
+ title="โšก Speed-Optimized Multi-Agent System (No Cache)",
1080
  theme=gr.themes.Soft(),
1081
  css="""
1082
  .gradio-container {
 
1086
  """
1087
  ) as demo:
1088
  gr.Markdown("""
1089
+ # โšก Enhanced Multi-Agent RAG System (์บ์‹ฑ ์ œ๊ฑฐ ๋ฒ„์ „)
1090
+ **Complex questions processed within 5-8 seconds | Multi-language support**
1091
+
1092
+ **Optimization Features:**
1093
+ - ๐Ÿš€ Parallel Processing: Concurrent agent execution
1094
+ - โšก Stream Buffering: Network optimization
1095
+ - ๐ŸŽฏ Early Termination: Complete immediately when quality is met
1096
+ - ๐ŸŒ Multi-language: Auto-detect Korean/English/Japanese/Chinese
1097
+ - โŒ **Caching Disabled**: ์บ์‹ฑ ๊ธฐ๋Šฅ ์ œ๊ฑฐ๋จ
1098
  """)
1099
 
1100
  with gr.Row():
1101
  with gr.Column(scale=3):
1102
  chatbot = gr.Chatbot(
1103
  height=500,
1104
+ label="๐Ÿ’ฌ Chat",
1105
  type="messages"
1106
  )
1107
 
1108
  msg = gr.Textbox(
1109
+ label="Enter complex question",
1110
+ placeholder="Enter complex questions requiring analysis, strategy, or creative solutions...",
1111
  lines=3
1112
  )
1113
 
1114
  with gr.Row():
1115
+ submit = gr.Button("โšก High-Speed Process", variant="primary")
1116
+ clear = gr.Button("๐Ÿ”„ Reset")
1117
 
1118
+ with gr.Accordion("๐Ÿค– Agent Processing", open=False):
1119
  agent_thoughts = gr.Markdown()
1120
 
1121
+ with gr.Accordion("๐Ÿ“š Search Sources", open=False):
1122
  search_sources = gr.Markdown()
1123
 
1124
  with gr.Column(scale=1):
1125
+ gr.Markdown("**โš™๏ธ Settings**")
1126
+
1127
+ language_mode = gr.Radio(
1128
+ choices=["Auto", "Korean", "English", "Japanese", "Chinese"],
1129
+ value="Auto",
1130
+ label="๐ŸŒ Language Mode"
1131
+ )
1132
 
1133
  use_search = gr.Checkbox(
1134
+ label="๐Ÿ” Use Web Search",
1135
  value=True
1136
  )
1137
 
1138
  show_agent_thoughts = gr.Checkbox(
1139
+ label="๐Ÿง  Show Processing",
1140
  value=True
1141
  )
1142
 
 
1145
  maximum=10,
1146
  value=5,
1147
  step=1,
1148
+ label="Search Results Count"
1149
  )
1150
 
1151
  gr.Markdown("""
1152
+ **โšก Optimization Status**
1153
+
1154
+ **Active Optimizations:**
1155
+ - โœ… Parallel Processing
1156
+ - โŒ ~~Smart Caching~~ (์ œ๊ฑฐ๋จ)
1157
+ - โœ… Buffer Streaming
1158
+ - โœ… Early Termination
1159
+ - โœ… Compressed Prompts
1160
+ - โœ… Multi-language Support
1161
+ - โœ… Error Recovery
1162
+
1163
+ **Expected Processing Time:**
1164
+ - Simple Query: 3-5 seconds
1165
+ - Complex Query: 5-8 seconds
1166
+ - Very Complex: 8-12 seconds
1167
  """)
1168
 
1169
+ # ๋ณต์žกํ•œ ์งˆ๋ฌธ ์˜ˆ์ œ (๋‹ค๊ตญ์–ด)
1170
  gr.Examples(
1171
  examples=[
1172
+ # Korean
1173
  "AI ๊ธฐ์ˆ ์ด ํ–ฅํ›„ 10๋…„๊ฐ„ ํ•œ๊ตญ ๊ฒฝ์ œ์— ๋ฏธ์น  ์˜ํ–ฅ์„ ๋‹ค๊ฐ๋„๋กœ ๋ถ„์„ํ•˜๊ณ  ๋Œ€์‘ ์ „๋žต์„ ์ œ์‹œํ•ด์ค˜",
1174
  "์Šคํƒ€ํŠธ์—…์ด ๋Œ€๊ธฐ์—…๊ณผ ๊ฒฝ์Ÿํ•˜๊ธฐ ์œ„ํ•œ ํ˜์‹ ์ ์ธ ์ „๋žต์„ ๋‹จ๊ณ„๋ณ„๋กœ ์ˆ˜๋ฆฝํ•ด์ค˜",
1175
+ # English
1176
+ "Analyze the multifaceted impact of quantum computing on current encryption systems and propose alternatives",
1177
+ "Design 5 innovative business models for climate change mitigation with practical implementation details",
1178
+ # Japanese
1179
+ "ใƒกใ‚ฟใƒใƒผใ‚นๆ™‚ไปฃใฎๆ•™่‚ฒ้ฉๆ–ฐๆ–นๆกˆใ‚’ๅฎŸ่ฃ…ๅฏ่ƒฝใชใƒฌใƒ™ใƒซใงๆๆกˆใ—ใฆใใ ใ•ใ„",
1180
+ # Chinese
1181
+ "ๅˆ†ๆžไบบๅทฅๆ™บ่ƒฝๅฏนๆœชๆฅๅๅนดๅ…จ็ƒ็ปๆตŽ็š„ๅฝฑๅ“ๅนถๆๅ‡บๅบ”ๅฏน็ญ–็•ฅ"
1182
  ],
1183
  inputs=msg
1184
  )
 
1186
  # ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
1187
  submit.click(
1188
  process_query_optimized,
1189
+ inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
1190
  outputs=[chatbot, agent_thoughts, search_sources]
1191
  ).then(
1192
  lambda: "",
 
1196
 
1197
  msg.submit(
1198
  process_query_optimized,
1199
+ inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode],
1200
  outputs=[chatbot, agent_thoughts, search_sources]
1201
  ).then(
1202
  lambda: "",
 
1220
  if __name__ == "__main__":
1221
  print("""
1222
  โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—
1223
+ โ•‘ โšก Speed-Optimized Multi-Agent System (No Cache) โšก โ•‘
1224
  โ•‘ โ•‘
1225
+ โ•‘ High-speed AI system processing complex questions โ•‘
1226
  โ•‘ โ•‘
1227
+ โ•‘ Features: โ•‘
1228
+ โ•‘ โ€ข Multi-language support (KO/EN/JA/ZH) โ•‘
1229
+ โ•‘ โ€ข Improved error recovery โ•‘
1230
+ โ•‘ โ€ข NO CACHING (์บ์‹ฑ ๊ธฐ๋Šฅ ์ œ๊ฑฐ๋จ) โ•‘
1231
+ โ•‘ โ€ข Adaptive stream buffering โ•‘
1232
+ โ•‘ โ€ข Response cleaning & formatting โ•‘
1233
  โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
1234
  """)
1235
 
1236
  # API ํ‚ค ํ™•์ธ
1237
  if not os.getenv("FIREWORKS_API_KEY"):
1238
+ print("\nโš ๏ธ FIREWORKS_API_KEY is not set.")
1239
 
1240
  if not os.getenv("BRAVE_SEARCH_API_KEY"):
1241
+ print("\nโš ๏ธ BRAVE_SEARCH_API_KEY is not set.")
1242
 
1243
  # Gradio ์•ฑ ์‹คํ–‰
1244
  demo = create_optimized_gradio_interface()
 
1246
  is_hf_spaces = os.getenv("SPACE_ID") is not None
1247
 
1248
  if is_hf_spaces:
1249
+ print("\n๐Ÿค— Running in optimized mode on Hugging Face Spaces (No Cache)...")
1250
  demo.launch(server_name="0.0.0.0", server_port=7860)
1251
  else:
1252
+ print("\n๐Ÿ’ป Running in optimized mode on local environment (No Cache)...")
1253
  demo.launch(server_name="0.0.0.0", server_port=7860, share=False)