ikram98ai commited on
Commit
4eadf6e
·
1 Parent(s): 552e91a

adding docstring for mcp and renaming function

Browse files
Files changed (1) hide show
  1. src/app.py +75 -35
src/app.py CHANGED
@@ -4,28 +4,35 @@ import yaml
4
  import sys
5
  from pathlib import Path
6
  from dataclasses import asdict
7
-
8
  # Ensure project root is on sys.path when running this module as a script.
9
  _project_root = Path(__file__).resolve().parents[1]
10
  if str(_project_root) not in sys.path:
11
  sys.path.insert(0, str(_project_root))
12
 
13
-
14
  from src.core.ingest import ingest
15
  from src.core.retrieval import generate, retrieval
16
  from src.core.index import MetaData
17
  from src.core.synthetic_data import EVAL_QUERIES, SYNTHETIC_DOCUMENTS
18
- from src.core.eval import (
19
- run_full_evaluation,
20
- save_results,
21
- generate_summary_report,
22
- setup_test_data,
23
- )
24
 
25
 
26
- def process_files(files, index_name, lang, domain, section, topic, doc_type):
27
  """
28
- Loading, chunking, embedding, and storing in a vector DB.
 
 
 
 
 
 
 
 
 
 
 
 
29
  """
30
  print("files uploaded", files)
31
  if not files:
@@ -44,7 +51,7 @@ def process_files(files, index_name, lang, domain, section, topic, doc_type):
44
  result = ingest(index_name, filter_data, files)
45
  return {"status": "success", "message": result}
46
 
47
- def add_metric(doc):
48
  return (f"\n### source: {doc.metadata.get('source_name','None')}"
49
  f"\n### similarity_score: {doc.metadata.get('similarity_score','None'):.4f}"
50
  )
@@ -64,7 +71,7 @@ def _rag_query(
64
  ret_start_time = time.time()
65
 
66
  docs = retrieval(question, index_name, active_filters)
67
- retrieval_results = [doc.page_content + add_metric(doc) for doc in docs]
68
  snippets_md = "\n\n---\n\n".join(retrieval_results)
69
 
70
  ret_end_time = time.time()
@@ -82,11 +89,21 @@ def _rag_query(
82
  return answer, snippets_md
83
 
84
 
85
- def run_rag_comparison(question, index_name, lang, domain, section, topic, doc_type):
86
  """
87
- Function to Runs two RAG simulations side-by-side.
88
- This version is a generator: it yields a loading state first so the UI shows
89
- a loading animation/text immediately, then yields final results.
 
 
 
 
 
 
 
 
 
 
90
  """
91
  if not index_name:
92
  error_msg = "Please select an index to query."
@@ -120,7 +137,7 @@ def run_rag_comparison(question, index_name, lang, domain, section, topic, doc_t
120
  yield base_answer, base_snippets, hier_answer, hier_snippets
121
 
122
 
123
- def load_yaml_config(yaml_file):
124
  """
125
  Parses the uploaded YAML file and returns the config dictionary.
126
  """
@@ -142,10 +159,10 @@ def load_yaml_config(yaml_file):
142
  gr.Warning(f"Failed to load YAML config: {e}")
143
  return None
144
 
145
- def update_metadata_for_index_ingest(index, config):
146
  """
147
  Updates the Ingestion filter dropdowns based on the selected index and loaded config.
148
- """
149
  if config is None or index is None:
150
  empty_update = gr.update(choices=[], value=None)
151
  return empty_update, empty_update, empty_update
@@ -162,7 +179,7 @@ def update_metadata_for_index_ingest(index, config):
162
  gr.update(choices=topics, value=topics[0] if topics else None)
163
  )
164
 
165
- def update_filters_for_index_chat(index, config):
166
  """
167
  Updates the Chat filter dropdowns based on the selected index and loaded config.
168
  """
@@ -183,8 +200,16 @@ def update_filters_for_index_chat(index, config):
183
  )
184
 
185
 
186
- def setup_synthetic_data(collections):
187
- """Setup synthetic test data for evaluation"""
 
 
 
 
 
 
 
 
188
  if not collections:
189
  return "⚠️ Please select at least one collection"
190
 
@@ -195,8 +220,18 @@ def setup_synthetic_data(collections):
195
  return f"❌ Error setting up test data: {str(e)}"
196
 
197
 
198
- def run_evaluation_batch(collections, output_dir, progress=gr.Progress(track_tqdm=True)):
199
- """Run full batch evaluation"""
 
 
 
 
 
 
 
 
 
 
200
  if not collections:
201
  return (
202
  "⚠️ Please select at least one collection",
@@ -329,7 +364,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="RAG Evaluation System") as demo:
329
  ingest_output = gr.JSON(label="Ingestion Status and Sample Metadata")
330
 
331
  ingest_button.click(
332
- fn=process_files,
333
  inputs=[
334
  file_uploader,
335
  index_select_ingest,
@@ -516,31 +551,36 @@ with gr.Blocks(theme=gr.themes.Soft(), title="RAG Evaluation System") as demo:
516
 
517
  # 1. When YAML is uploaded, store its content in config_state
518
  yaml_uploader.upload(
519
- fn=load_yaml_config,
520
  inputs=[yaml_uploader],
521
- outputs=[config_state]
 
522
  ).then(
523
- fn=update_metadata_for_index_ingest,
524
  inputs=[index_select_ingest, config_state],
525
- outputs=[domain_select_ingest, section_select_ingest, topic_select_ingest]
 
526
  ).then(
527
- fn=update_filters_for_index_chat,
528
  inputs=[index_select_chat, config_state],
529
- outputs=[domain_select, section_select, topic_select]
 
530
  )
531
 
532
  # 2. When the Ingest index changes, update its metadata
533
  index_select_ingest.change(
534
- fn=update_metadata_for_index_ingest,
535
  inputs=[index_select_ingest, config_state],
536
- outputs=[domain_select_ingest, section_select_ingest, topic_select_ingest]
 
537
  )
538
 
539
  # 3. When the Chat index changes, update its filters
540
  index_select_chat.change(
541
- fn=update_filters_for_index_chat,
542
  inputs=[index_select_chat, config_state],
543
- outputs=[domain_select, section_select, topic_select]
 
544
  )
545
 
546
 
 
4
  import sys
5
  from pathlib import Path
6
  from dataclasses import asdict
7
+ from typing import Optional, List, Literal
8
  # Ensure project root is on sys.path when running this module as a script.
9
  _project_root = Path(__file__).resolve().parents[1]
10
  if str(_project_root) not in sys.path:
11
  sys.path.insert(0, str(_project_root))
12
 
 
13
  from src.core.ingest import ingest
14
  from src.core.retrieval import generate, retrieval
15
  from src.core.index import MetaData
16
  from src.core.synthetic_data import EVAL_QUERIES, SYNTHETIC_DOCUMENTS
17
+ from src.core.eval import run_full_evaluation, save_results
18
+ from src.core.eval import generate_summary_report, setup_test_data
 
 
 
 
19
 
20
 
21
+ def ingest_files(files:List[str], index_name:str, lang:Literal["en", "ja"], domain:Optional[str], section:Optional[str], topic:Optional[str], doc_type:Optional[Literal["manual", "policy", "faq"]]):
22
  """
23
+ Load, chunk, embed, and store files in a vector database.
24
+
25
+ Args:
26
+ files (list): A list of files to process.
27
+ index_name (str): The name of the index to store the files in.
28
+ lang (str): The language of the files.
29
+ domain (str): The domain of the files.
30
+ section (str): The section of the files.
31
+ topic (str): The topic of the files.
32
+ doc_type (str): The document type of the files.
33
+
34
+ Returns:
35
+ dict: A dictionary containing the status of the ingestion.
36
  """
37
  print("files uploaded", files)
38
  if not files:
 
51
  result = ingest(index_name, filter_data, files)
52
  return {"status": "success", "message": result}
53
 
54
+ def _add_metric(doc):
55
  return (f"\n### source: {doc.metadata.get('source_name','None')}"
56
  f"\n### similarity_score: {doc.metadata.get('similarity_score','None'):.4f}"
57
  )
 
71
  ret_start_time = time.time()
72
 
73
  docs = retrieval(question, index_name, active_filters)
74
+ retrieval_results = [doc.page_content + _add_metric(doc) for doc in docs]
75
  snippets_md = "\n\n---\n\n".join(retrieval_results)
76
 
77
  ret_end_time = time.time()
 
89
  return answer, snippets_md
90
 
91
 
92
+ def run_rag_comparison(question:str, index_name:str, lang:Literal["en", "ja"], domain:Optional[str], section:Optional[str], topic:Optional[str], doc_type:Optional[Literal["manual", "policy", "faq"]]):
93
  """
94
+ Run two RAG simulations side-by-side for comparison.
95
+
96
+ Args:
97
+ question (str): The question to ask the RAG models.
98
+ index_name (str): The name of the index to query.
99
+ lang (str): The language of the query.
100
+ domain (str): The domain to filter by.
101
+ section (str): The section to filter by.
102
+ topic (str): The topic to filter by.
103
+ doc_type (str): The document type to filter by.
104
+
105
+ Returns:
106
+ tuple: A tuple containing the answers and snippets for both base and hierarchical RAG.
107
  """
108
  if not index_name:
109
  error_msg = "Please select an index to query."
 
137
  yield base_answer, base_snippets, hier_answer, hier_snippets
138
 
139
 
140
+ def _load_yaml_config(yaml_file):
141
  """
142
  Parses the uploaded YAML file and returns the config dictionary.
143
  """
 
159
  gr.Warning(f"Failed to load YAML config: {e}")
160
  return None
161
 
162
+ def _update_metadata_for_index_ingest(index, config):
163
  """
164
  Updates the Ingestion filter dropdowns based on the selected index and loaded config.
165
+ """
166
  if config is None or index is None:
167
  empty_update = gr.update(choices=[], value=None)
168
  return empty_update, empty_update, empty_update
 
179
  gr.update(choices=topics, value=topics[0] if topics else None)
180
  )
181
 
182
+ def _update_filters_for_index_chat(index, config):
183
  """
184
  Updates the Chat filter dropdowns based on the selected index and loaded config.
185
  """
 
200
  )
201
 
202
 
203
+ def setup_synthetic_data(collections: List[str]):
204
+ """
205
+ Set up synthetic test data for evaluation.
206
+
207
+ Args:
208
+ collections (list): A list of collections to set up synthetic data for.
209
+
210
+ Returns:
211
+ str: A message indicating the status of the data setup.
212
+ """
213
  if not collections:
214
  return "⚠️ Please select at least one collection"
215
 
 
220
  return f"❌ Error setting up test data: {str(e)}"
221
 
222
 
223
+ def run_evaluation_batch(collections:List[str], output_dir:str, progress=gr.Progress(track_tqdm=True)):
224
+ """
225
+ Run a full batch evaluation.
226
+
227
+ Args:
228
+ collections (list): A list of collections to evaluate.
229
+ output_dir (str): The directory to save the evaluation reports in.
230
+ progress (gradio.Progress): A Gradio progress object to track the evaluation progress.
231
+
232
+ Returns:
233
+ tuple: A tuple containing the evaluation status, summary statistics, and file paths for the generated reports.
234
+ """
235
  if not collections:
236
  return (
237
  "⚠️ Please select at least one collection",
 
364
  ingest_output = gr.JSON(label="Ingestion Status and Sample Metadata")
365
 
366
  ingest_button.click(
367
+ fn=ingest_files,
368
  inputs=[
369
  file_uploader,
370
  index_select_ingest,
 
551
 
552
  # 1. When YAML is uploaded, store its content in config_state
553
  yaml_uploader.upload(
554
+ fn=_load_yaml_config,
555
  inputs=[yaml_uploader],
556
+ outputs=[config_state],
557
+ show_api=False
558
  ).then(
559
+ fn=_update_metadata_for_index_ingest,
560
  inputs=[index_select_ingest, config_state],
561
+ outputs=[domain_select_ingest, section_select_ingest, topic_select_ingest],
562
+ show_api=False
563
  ).then(
564
+ fn=_update_filters_for_index_chat,
565
  inputs=[index_select_chat, config_state],
566
+ outputs=[domain_select, section_select, topic_select],
567
+ show_api=False
568
  )
569
 
570
  # 2. When the Ingest index changes, update its metadata
571
  index_select_ingest.change(
572
+ fn=_update_metadata_for_index_ingest,
573
  inputs=[index_select_ingest, config_state],
574
+ outputs=[domain_select_ingest, section_select_ingest, topic_select_ingest],
575
+ show_api=False
576
  )
577
 
578
  # 3. When the Chat index changes, update its filters
579
  index_select_chat.change(
580
+ fn=_update_filters_for_index_chat,
581
  inputs=[index_select_chat, config_state],
582
+ outputs=[domain_select, section_select, topic_select],
583
+ show_api=False
584
  )
585
 
586