idiom-finder

Running

App Files Files Community

Mel Seto commited on 19 days ago

Commit

021749e

1 Parent(s): c1e01a2

add traditional character idiom

Browse files

Files changed (3) hide show

pyproject.toml +1 -0
src/app.py +27 -10
uv.lock +13 -0

pyproject.toml CHANGED Viewed

@@ -13,6 +13,7 @@ dependencies = [
     "pypinyin>=0.55.0",
     "sentence-transformers>=2.2.2",
     "numpy>=1.26.0",
 ]
 [dependency-groups]

     "pypinyin>=0.55.0",
     "sentence-transformers>=2.2.2",
     "numpy>=1.26.0",
+    "opencc>=1.1.9",
 ]
 [dependency-groups]

src/app.py CHANGED Viewed

@@ -1,11 +1,10 @@
-import json
 import os
 import gradio as gr
 from cerebras.cloud.sdk import Cerebras
 from dotenv import load_dotenv
-from retrieval.retriever import retrieve_idiom
 from utils.utils import get_pinyin
 from verification.verifier import verify_idiom_exists
@@ -17,6 +16,8 @@ load_dotenv()
 MODEL = "gpt-oss-120b"
 USE_MOCK = False  # ✅ Toggle between mock and real API
 # ======================
 # Instantiate client (if not mocking)
@@ -31,9 +32,11 @@ if not USE_MOCK:
 # ======================
 def find_idiom_mock():
     idiom = "对症下药"
     explanation = """duì zhèng xià yào<br><br>
     To prescribe the right medicine; to take the right approach to a problem."""
-    return idiom, explanation
 # ======================
@@ -46,7 +49,7 @@ EXAMPLE_CACHE = {}
 def find_idiom(situation: str, max_attempts: int = 3):
     """
-    Generate a verified Chinese idiom for a given situation.
     Uses verify_idiom_exists() to confirm idiom validity.
     """
@@ -58,7 +61,7 @@ def find_idiom(situation: str, max_attempts: int = 3):
 1. A Chinese idiom (includes 成語、俗語、諺語),
    written in simplified Chinese characters,
    that conveys the idea of the given situation.
-2. Its literal English translation
 3. Explain idiom in English. Keep explanation to 2-3 concise sentences.
 Format:
@@ -78,6 +81,7 @@ Answer:"""
         lines = [line.strip() for line in generated_text.split("\n") if line.strip()]
         llm_idiom = lines[0] if lines else generated_text
         # 2️⃣ Verify idiom using CC-CEDICT + Wiktionary
         if verify_idiom_exists(llm_idiom):
@@ -86,14 +90,27 @@ Answer:"""
             if len(lines) >= 3:
                 translation = lines[1]
                 meaning = " ".join(lines[2:])
-                explanation = f"{pinyin_text}<br><br>{translation}<br><br>{meaning}"
             else:
-                explanation = f"{pinyin_text}<br><br>{' '.join(lines[1:])}"
             EXAMPLE_CACHE[situation] = (llm_idiom, explanation)
-            return llm_idiom, explanation
         else:
-            print(f"Attempt {attempt}: '{llm_idiom}' failed verification, retrying...")
     # Fallback if no verified idiom found
     fallback_idiom = "未找到成语"

 import os
 import gradio as gr
 from cerebras.cloud.sdk import Cerebras
 from dotenv import load_dotenv
+from opencc import OpenCC
 from utils.utils import get_pinyin
 from verification.verifier import verify_idiom_exists
 MODEL = "gpt-oss-120b"
 USE_MOCK = False  # ✅ Toggle between mock and real API
+# simplified to traditional Chinese character converter
+char_converter = OpenCC('s2t')
 # ======================
 # Instantiate client (if not mocking)
 # ======================
 def find_idiom_mock():
     idiom = "对症下药"
+    trad_idiom = char_converter.convert(idiom)
     explanation = """duì zhèng xià yào<br><br>
     To prescribe the right medicine; to take the right approach to a problem."""
+    idiom_output = f"{idiom}<br>{trad_idiom}"
+    return idiom_output, explanation
 # ======================
 def find_idiom(situation: str, max_attempts: int = 3):
     """
+    Find a verified Chinese idiom for a given situation.
     Uses verify_idiom_exists() to confirm idiom validity.
     """
 1. A Chinese idiom (includes 成語、俗語、諺語),
    written in simplified Chinese characters,
    that conveys the idea of the given situation.
+2. Its literal English translationx
 3. Explain idiom in English. Keep explanation to 2-3 concise sentences.
 Format:
         lines = [line.strip() for line in generated_text.split("\n") if line.strip()]
         llm_idiom = lines[0] if lines else generated_text
+        trad_idiom = char_converter.convert(llm_idiom) if char_converter else None
         # 2️⃣ Verify idiom using CC-CEDICT + Wiktionary
         if verify_idiom_exists(llm_idiom):
             if len(lines) >= 3:
                 translation = lines[1]
                 meaning = " ".join(lines[2:])
             else:
+                translation = ""
+                meaning = " ".join(lines[1:])
+            explanation = f"""
+                <div style="line-height: 1.6;">
+                    <p style="margin: 0;">
+                        {pinyin_text}
+                    </p>
+                    <hr style="border: none; border-top: 1px solid #ddd; margin: 8px 0;">
+                    <p style="margin: 0;">
+                        <i>{translation}</i><br>
+                        {meaning}
+                    </p>
+                </div>
+            """
             EXAMPLE_CACHE[situation] = (llm_idiom, explanation)
+            idiom_output = f"{llm_idiom}<br>{trad_idiom}"
+            return idiom_output, explanation
         else:
+            print(f"Attempt {attempt}: '{idiom_output}' failed verification, retrying...")
     # Fallback if no verified idiom found
     fallback_idiom = "未找到成语"

uv.lock CHANGED Viewed

@@ -325,6 +325,7 @@ dependencies = [
     { name = "gradio" },
     { name = "numpy" },
     { name = "ollama" },
     { name = "pycccedict" },
     { name = "pypinyin" },
     { name = "sentence-transformers" },
@@ -347,6 +348,7 @@ requires-dist = [
     { name = "gradio", specifier = ">=4.44.1" },
     { name = "numpy", specifier = ">=1.26.0" },
     { name = "ollama", specifier = ">=0.5.3" },
     { name = "pycccedict", specifier = ">=1.2.0" },
     { name = "pypinyin", specifier = ">=0.55.0" },
     { name = "sentence-transformers", specifier = ">=2.2.2" },
@@ -1093,6 +1095,17 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/be/f6/2091e50b8b6c3e6901f6eab283d5efd66fb71c86ddb1b4d68766c3eeba0f/ollama-0.5.3-py3-none-any.whl", hash = "sha256:a8303b413d99a9043dbf77ebf11ced672396b59bec27e6d5db67c88f01b279d2", size = 13490, upload-time = "2025-08-07T21:44:09.353Z" },
 ]
 [[package]]
 name = "orjson"
 version = "3.11.3"

     { name = "gradio" },
     { name = "numpy" },
     { name = "ollama" },
+    { name = "opencc" },
     { name = "pycccedict" },
     { name = "pypinyin" },
     { name = "sentence-transformers" },
     { name = "gradio", specifier = ">=4.44.1" },
     { name = "numpy", specifier = ">=1.26.0" },
     { name = "ollama", specifier = ">=0.5.3" },
+    { name = "opencc", specifier = ">=1.1.9" },
     { name = "pycccedict", specifier = ">=1.2.0" },
     { name = "pypinyin", specifier = ">=0.55.0" },
     { name = "sentence-transformers", specifier = ">=2.2.2" },
     { url = "https://files.pythonhosted.org/packages/be/f6/2091e50b8b6c3e6901f6eab283d5efd66fb71c86ddb1b4d68766c3eeba0f/ollama-0.5.3-py3-none-any.whl", hash = "sha256:a8303b413d99a9043dbf77ebf11ced672396b59bec27e6d5db67c88f01b279d2", size = 13490, upload-time = "2025-08-07T21:44:09.353Z" },
 ]
+[[package]]
+name = "opencc"
+version = "1.1.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e6/a2/0e86df1284143c389a3a6e33b159394da34b500a62b2b9c918949a2e6438/opencc-1.1.9.tar.gz", hash = "sha256:8ad72283732951303390fae33a1ceda98ac9b03368a8f2912edc934d74077e4a", size = 3409025, upload-time = "2024-08-08T04:55:32.483Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8c/58/d1f270e9d329d4f4f7c1963f9700aa9c9d6f0c5042c641005da6369b4c8e/OpenCC-1.1.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:48bc3e37942b91a9cf51f525631792f79378e5332bdba9e10c05f6e7fe9036ca", size = 1482516, upload-time = "2024-08-08T05:03:36.889Z" },
+    { url = "https://files.pythonhosted.org/packages/23/f0/ddd3522a142ebb66b30c7d30509de940979c2fb30a9edbf417fdfc37278d/OpenCC-1.1.9-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:1c5d1489bdaf9dc2865f0ea30eb565093253e73c1868d9c19554c8a044b545d4", size = 1653167, upload-time = "2024-08-08T04:55:26.395Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/67/fb4fb43c1502fd9f14646211d9643ef67e8123455e176af6668265d2f875/OpenCC-1.1.9-cp312-cp312-win_amd64.whl", hash = "sha256:64f8d22c8505b65e8ee2d6e73241cbc92785d38b3c93885b423d7c4fcd31c679", size = 1756337, upload-time = "2024-08-08T04:58:34.798Z" },
+]
 [[package]]
 name = "orjson"
 version = "3.11.3"