idiom-finder

Sleeping

App Files Files Community

Mel Seto commited on Sep 25

Commit

c78b0c8

1 Parent(s): a40ed0a

add RAG option to app

Browse files

Files changed (2) hide show

requirements.txt +0 -133
src/app.py +34 -6

requirements.txt CHANGED Viewed

@@ -1,9 +1,5 @@
 # This file was autogenerated by uv via the following command:
-<<<<<<< HEAD
-#    uv pip compile pyproject.toml -o requirements.txt
-=======
 #    uv export --no-hashes --format requirements-txt
->>>>>>> 660f6fb (organizing files into src folder etc.)
 aiofiles==24.1.0
     # via gradio
 aiohappyeyeballs==2.6.1
@@ -20,14 +16,6 @@ anyio==4.10.0
     #   gradio
     #   httpx
     #   starlette
-<<<<<<< HEAD
-attrs==25.3.0
-    # via aiohttp
-brotli==1.1.0
-    # via gradio
-cerebras-cloud-sdk==1.50.1
-    # via chinese-idioms (pyproject.toml)
-=======
     #   watchfiles
 astroid==3.3.11
     # via pylint
@@ -40,7 +28,6 @@ brotli==1.1.0
     # via gradio
 cerebras-cloud-sdk==1.50.1
     # via chinese-idioms
->>>>>>> 660f6fb (organizing files into src folder etc.)
 certifi==2025.8.3
     # via
     #   httpcore
@@ -50,12 +37,6 @@ charset-normalizer==3.4.3
     # via requests
 click==8.2.1
     # via
-<<<<<<< HEAD
-    #   typer
-    #   uvicorn
-datasets==4.1.0
-    # via chinese-idioms (pyproject.toml)
-=======
     #   black
     #   typer
     #   uvicorn
@@ -67,21 +48,14 @@ colorama==0.4.6 ; sys_platform == 'win32'
     #   tqdm
 datasets==4.1.0
     # via chinese-idioms
->>>>>>> 660f6fb (organizing files into src folder etc.)
 dill==0.4.0
     # via
     #   datasets
     #   multiprocess
-<<<<<<< HEAD
-distro==1.9.0
-    # via cerebras-cloud-sdk
-fastapi==0.116.2
-=======
     #   pylint
 distro==1.9.0
     # via cerebras-cloud-sdk
 fastapi==0.116.1
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via gradio
 ffmpy==0.6.1
     # via gradio
@@ -89,34 +63,21 @@ filelock==3.19.1
     # via
     #   datasets
     #   huggingface-hub
-<<<<<<< HEAD
-=======
     #   torch
     #   transformers
->>>>>>> 660f6fb (organizing files into src folder etc.)
 frozenlist==1.7.0
     # via
     #   aiohttp
     #   aiosignal
-<<<<<<< HEAD
-fsspec==2025.9.0
-=======
 fsspec==2025.7.0
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via
     #   datasets
     #   gradio-client
     #   huggingface-hub
-<<<<<<< HEAD
-gradio==5.46.0
-    # via chinese-idioms (pyproject.toml)
-gradio-client==1.13.0
-=======
     #   torch
 gradio==5.44.0
     # via chinese-idioms
 gradio-client==1.12.1
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via gradio
 groovy==0.1.2
     # via gradio
@@ -124,11 +85,7 @@ h11==0.16.0
     # via
     #   httpcore
     #   uvicorn
-<<<<<<< HEAD
-hf-xet==1.1.10
-=======
 hf-xet==1.1.8 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via huggingface-hub
 httpcore==1.0.9
     # via httpx
@@ -139,32 +96,20 @@ httpx==0.28.1
     #   gradio-client
     #   ollama
     #   safehttpx
-<<<<<<< HEAD
-huggingface-hub==0.35.0
-=======
 huggingface-hub==0.34.4
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via
     #   datasets
     #   gradio
     #   gradio-client
-<<<<<<< HEAD
-=======
     #   sentence-transformers
     #   tokenizers
     #   transformers
->>>>>>> 660f6fb (organizing files into src folder etc.)
 idna==3.10
     # via
     #   anyio
     #   httpx
     #   requests
     #   yarl
-<<<<<<< HEAD
-jinja2==3.1.6
-    # via gradio
-markdown-it-py==4.0.0
-=======
 iniconfig==2.1.0
     # via pytest
 isort==6.0.1
@@ -176,38 +121,23 @@ jinja2==3.1.6
 joblib==1.5.2
     # via scikit-learn
 markdown-it-py==4.0.0 ; sys_platform != 'emscripten'
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via rich
 markupsafe==3.0.2
     # via
     #   gradio
     #   jinja2
-<<<<<<< HEAD
-mdurl==0.1.2
-    # via markdown-it-py
-=======
 mccabe==0.7.0
     # via pylint
 mdurl==0.1.2 ; sys_platform != 'emscripten'
     # via markdown-it-py
 mpmath==1.3.0
     # via sympy
->>>>>>> 660f6fb (organizing files into src folder etc.)
 multidict==6.6.4
     # via
     #   aiohttp
     #   yarl
 multiprocess==0.70.16
     # via datasets
-<<<<<<< HEAD
-numpy >= 2.0, < 3.0
-    # via
-    #   datasets
-    #   gradio
-    #   pandas
-ollama==0.5.4
-    # via chinese-idioms (pyproject.toml)
-=======
 mypy-extensions==1.1.0
     # via black
 networkx==3.5
@@ -260,32 +190,21 @@ nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'li
     # via torch
 ollama==0.5.3
     # via chinese-idioms
->>>>>>> 660f6fb (organizing files into src folder etc.)
 orjson==3.11.3
     # via gradio
 packaging==25.0
     # via
-<<<<<<< HEAD
-=======
     #   black
->>>>>>> 660f6fb (organizing files into src folder etc.)
     #   datasets
     #   gradio
     #   gradio-client
     #   huggingface-hub
-<<<<<<< HEAD
-=======
     #   pytest
     #   transformers
->>>>>>> 660f6fb (organizing files into src folder etc.)
 pandas==2.3.2
     # via
     #   datasets
     #   gradio
-<<<<<<< HEAD
-pillow==11.3.0
-    # via gradio
-=======
 pathspec==0.12.1
     # via black
 pillow==11.3.0
@@ -298,7 +217,6 @@ platformdirs==4.4.0
     #   pylint
 pluggy==1.6.0
     # via pytest
->>>>>>> 660f6fb (organizing files into src folder etc.)
 propcache==0.3.2
     # via
     #   aiohttp
@@ -306,13 +224,8 @@ propcache==0.3.2
 pyarrow==21.0.0
     # via datasets
 pycccedict==1.2.0
-<<<<<<< HEAD
-    # via chinese-idioms (pyproject.toml)
-pydantic==2.11.9
-=======
     # via chinese-idioms
 pydantic==2.11.7
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via
     #   cerebras-cloud-sdk
     #   fastapi
@@ -323,13 +236,6 @@ pydantic-core==2.33.2
 pydub==0.25.1
     # via gradio
 pygments==2.19.2
-<<<<<<< HEAD
-    # via rich
-pypinyin==0.55.0
-    # via chinese-idioms (pyproject.toml)
-python-dateutil==2.9.0.post0
-    # via pandas
-=======
     # via
     #   pytest
     #   rich
@@ -340,7 +246,6 @@ pytest==8.4.2
 python-dateutil==2.9.0.post0
     # via pandas
 python-dotenv==1.1.1
->>>>>>> 660f6fb (organizing files into src folder etc.)
 python-multipart==0.0.20
     # via gradio
 pytz==2025.2
@@ -350,27 +255,13 @@ pyyaml==6.0.2
     #   datasets
     #   gradio
     #   huggingface-hub
-<<<<<<< HEAD
-=======
     #   transformers
 regex==2025.9.18
     # via transformers
->>>>>>> 660f6fb (organizing files into src folder etc.)
 requests==2.32.5
     # via
     #   datasets
     #   huggingface-hub
-<<<<<<< HEAD
-rich==14.1.0
-    # via typer
-ruff==0.13.0
-    # via gradio
-safehttpx==0.1.6
-    # via gradio
-semantic-version==2.10.0
-    # via gradio
-shellingham==1.5.4
-=======
     #   transformers
 rich==14.1.0 ; sys_platform != 'emscripten'
     # via typer
@@ -395,7 +286,6 @@ setuptools==80.9.0
     #   torch
     #   triton
 shellingham==1.5.4 ; sys_platform != 'emscripten'
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via typer
 six==1.17.0
     # via python-dateutil
@@ -403,14 +293,6 @@ sniffio==1.3.1
     # via
     #   anyio
     #   cerebras-cloud-sdk
-<<<<<<< HEAD
-starlette==0.48.0
-    # via
-    #   fastapi
-    #   gradio
-tomlkit==0.13.3
-    # via gradio
-=======
 starlette==0.47.3
     # via
     #   fastapi
@@ -427,14 +309,10 @@ tomlkit==0.13.3
     #   pylint
 torch==2.8.0
     # via sentence-transformers
->>>>>>> 660f6fb (organizing files into src folder etc.)
 tqdm==4.67.1
     # via
     #   datasets
     #   huggingface-hub
-<<<<<<< HEAD
-typer==0.17.4
-=======
     #   sentence-transformers
     #   transformers
 transformers==4.56.2
@@ -442,7 +320,6 @@ transformers==4.56.2
 triton==3.4.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
 typer==0.16.1 ; sys_platform != 'emscripten'
->>>>>>> 660f6fb (organizing files into src folder etc.)
     # via gradio
 typing-extensions==4.15.0
     # via
@@ -455,13 +332,9 @@ typing-extensions==4.15.0
     #   huggingface-hub
     #   pydantic
     #   pydantic-core
-<<<<<<< HEAD
-    #   starlette
-=======
     #   sentence-transformers
     #   starlette
     #   torch
->>>>>>> 660f6fb (organizing files into src folder etc.)
     #   typer
     #   typing-inspection
 typing-inspection==0.4.1
@@ -469,18 +342,12 @@ typing-inspection==0.4.1
 tzdata==2025.2
     # via pandas
 urllib3==2.5.0
-<<<<<<< HEAD
-    # via requests
-uvicorn==0.35.0
-    # via gradio
-=======
     # via
     #   gradio
     #   requests
 uvicorn==0.35.0 ; sys_platform != 'emscripten'
     # via gradio
 watchfiles==1.1.0
->>>>>>> 660f6fb (organizing files into src folder etc.)
 websockets==15.0.1
     # via gradio-client
 xxhash==3.5.0

 # This file was autogenerated by uv via the following command:
 #    uv export --no-hashes --format requirements-txt
 aiofiles==24.1.0
     # via gradio
 aiohappyeyeballs==2.6.1
     #   gradio
     #   httpx
     #   starlette
     #   watchfiles
 astroid==3.3.11
     # via pylint
     # via gradio
 cerebras-cloud-sdk==1.50.1
     # via chinese-idioms
 certifi==2025.8.3
     # via
     #   httpcore
     # via requests
 click==8.2.1
     # via
     #   black
     #   typer
     #   uvicorn
     #   tqdm
 datasets==4.1.0
     # via chinese-idioms
 dill==0.4.0
     # via
     #   datasets
     #   multiprocess
     #   pylint
 distro==1.9.0
     # via cerebras-cloud-sdk
 fastapi==0.116.1
     # via gradio
 ffmpy==0.6.1
     # via gradio
     # via
     #   datasets
     #   huggingface-hub
     #   torch
     #   transformers
 frozenlist==1.7.0
     # via
     #   aiohttp
     #   aiosignal
 fsspec==2025.7.0
     # via
     #   datasets
     #   gradio-client
     #   huggingface-hub
     #   torch
 gradio==5.44.0
     # via chinese-idioms
 gradio-client==1.12.1
     # via gradio
 groovy==0.1.2
     # via gradio
     # via
     #   httpcore
     #   uvicorn
 hf-xet==1.1.8 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
     # via huggingface-hub
 httpcore==1.0.9
     # via httpx
     #   gradio-client
     #   ollama
     #   safehttpx
 huggingface-hub==0.34.4
     # via
     #   datasets
     #   gradio
     #   gradio-client
     #   sentence-transformers
     #   tokenizers
     #   transformers
 idna==3.10
     # via
     #   anyio
     #   httpx
     #   requests
     #   yarl
 iniconfig==2.1.0
     # via pytest
 isort==6.0.1
 joblib==1.5.2
     # via scikit-learn
 markdown-it-py==4.0.0 ; sys_platform != 'emscripten'
     # via rich
 markupsafe==3.0.2
     # via
     #   gradio
     #   jinja2
 mccabe==0.7.0
     # via pylint
 mdurl==0.1.2 ; sys_platform != 'emscripten'
     # via markdown-it-py
 mpmath==1.3.0
     # via sympy
 multidict==6.6.4
     # via
     #   aiohttp
     #   yarl
 multiprocess==0.70.16
     # via datasets
 mypy-extensions==1.1.0
     # via black
 networkx==3.5
     # via torch
 ollama==0.5.3
     # via chinese-idioms
 orjson==3.11.3
     # via gradio
 packaging==25.0
     # via
     #   black
     #   datasets
     #   gradio
     #   gradio-client
     #   huggingface-hub
     #   pytest
     #   transformers
 pandas==2.3.2
     # via
     #   datasets
     #   gradio
 pathspec==0.12.1
     # via black
 pillow==11.3.0
     #   pylint
 pluggy==1.6.0
     # via pytest
 propcache==0.3.2
     # via
     #   aiohttp
 pyarrow==21.0.0
     # via datasets
 pycccedict==1.2.0
     # via chinese-idioms
 pydantic==2.11.7
     # via
     #   cerebras-cloud-sdk
     #   fastapi
 pydub==0.25.1
     # via gradio
 pygments==2.19.2
     # via
     #   pytest
     #   rich
 python-dateutil==2.9.0.post0
     # via pandas
 python-dotenv==1.1.1
 python-multipart==0.0.20
     # via gradio
 pytz==2025.2
     #   datasets
     #   gradio
     #   huggingface-hub
     #   transformers
 regex==2025.9.18
     # via transformers
 requests==2.32.5
     # via
     #   datasets
     #   huggingface-hub
     #   transformers
 rich==14.1.0 ; sys_platform != 'emscripten'
     # via typer
     #   torch
     #   triton
 shellingham==1.5.4 ; sys_platform != 'emscripten'
     # via typer
 six==1.17.0
     # via python-dateutil
     # via
     #   anyio
     #   cerebras-cloud-sdk
 starlette==0.47.3
     # via
     #   fastapi
     #   pylint
 torch==2.8.0
     # via sentence-transformers
 tqdm==4.67.1
     # via
     #   datasets
     #   huggingface-hub
     #   sentence-transformers
     #   transformers
 transformers==4.56.2
 triton==3.4.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
 typer==0.16.1 ; sys_platform != 'emscripten'
     # via gradio
 typing-extensions==4.15.0
     # via
     #   huggingface-hub
     #   pydantic
     #   pydantic-core
     #   sentence-transformers
     #   starlette
     #   torch
     #   typer
     #   typing-inspection
 typing-inspection==0.4.1
 tzdata==2025.2
     # via pandas
 urllib3==2.5.0
     # via
     #   gradio
     #   requests
 uvicorn==0.35.0 ; sys_platform != 'emscripten'
     # via gradio
 watchfiles==1.1.0
 websockets==15.0.1
     # via gradio-client
 xxhash==3.5.0

src/app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import gradio as gr
 from cerebras.cloud.sdk import Cerebras
 from dotenv import load_dotenv
 from utils.utils import get_pinyin
 # ======================
@@ -92,11 +93,30 @@ Answer:"""
 # ======================
 # UI Wrapper
 # ======================
-def update_ui(situation):
-    if USE_MOCK:
-        idiom, explanation = generate_idiom_mock()
     else:
-        idiom, explanation = generate_idiom(situation)
     return (
         f"<div class='idiom-output'>{idiom}</div>",
@@ -108,9 +128,9 @@ def update_ui(situation):
 # Launch app
 # ======================
 def launch_app():
     with gr.Blocks(css="style.css") as demo:
         gr.Markdown("# 🎋 Chinese Idiom Finder")
         with gr.Row():
             with gr.Column():
                 situation = gr.Textbox(
@@ -118,6 +138,11 @@ def launch_app():
                     lines=2,
                     placeholder="e.g., When facing a big challenge",
                 )
                 generate_btn = gr.Button("✨ Find Idiom")
                 # ✅ Example situations
@@ -138,9 +163,12 @@ def launch_app():
         # pylint: disable=no-member
         generate_btn.click(
-            fn=update_ui, inputs=situation, outputs=[idiom_output, explanation_output]
         )
     demo.launch()

 from cerebras.cloud.sdk import Cerebras
 from dotenv import load_dotenv
+from retrieval.retriever import retrieve_idiom
 from utils.utils import get_pinyin
 # ======================
 # ======================
 # UI Wrapper
 # ======================
+def update_ui(situation, mode):
+    if mode == "LLM":
+        if USE_MOCK:
+            idiom, explanation = generate_idiom_mock()
+        else:
+            idiom, explanation = generate_idiom(situation)
+    elif mode == "RAG":
+        top_idioms = retrieve_idiom(situation, top_k=3)
+        formatted_idioms = []
+        for idiom_entry in top_idioms:
+            # Split "<Chinese>: <English>" format
+            if ": " in idiom_entry:
+                chinese, english = idiom_entry.split(": ", 1)
+            else:
+                chinese, english = idiom_entry, ""
+            pinyin_text = get_pinyin(chinese)
+            formatted_idioms.append(f"<div class='idiom-entry'><b>{chinese}</b><br>{pinyin_text}<br>{english}</div>")
+        # Combine all entries with horizontal separators
+        idiom = "<hr>".join(formatted_idioms)
+        explanation = "Retrieved using embeddings (RAG)."
     else:
+        idiom = "Unknown mode"
+        explanation = ""
     return (
         f"<div class='idiom-output'>{idiom}</div>",
 # Launch app
 # ======================
 def launch_app():
     with gr.Blocks(css="style.css") as demo:
         gr.Markdown("# 🎋 Chinese Idiom Finder")
         with gr.Row():
             with gr.Column():
                 situation = gr.Textbox(
                     lines=2,
                     placeholder="e.g., When facing a big challenge",
                 )
+                mode_dropdown = gr.Dropdown(
+                    ["LLM", "RAG"],
+                    label="Mode",
+                    value="LLM",
+                )
                 generate_btn = gr.Button("✨ Find Idiom")
                 # ✅ Example situations
         # pylint: disable=no-member
         generate_btn.click(
+            fn=update_ui,
+            inputs=[situation, mode_dropdown],
+            outputs=[idiom_output, explanation_output],
         )
     demo.launch()