idiom-finder

Sleeping

App Files Files Community

Mel Seto commited on 8 days ago

Commit

a6cd490

2 Parent(s): 7ce643e 996c31c

Merge branch 'main' into add-traditional

Browse files

Files changed (4) hide show

pyproject.toml +1 -0
requirements.txt +1 -0
src/app.py +46 -40
uv.lock +11 -0

pyproject.toml CHANGED Viewed

@@ -13,6 +13,7 @@ dependencies = [
     "pypinyin>=0.55.0",
     "sentence-transformers>=2.2.2",
     "numpy>=1.26.0",
     "opencc>=1.1.9",
 ]

     "pypinyin>=0.55.0",
     "sentence-transformers>=2.2.2",
     "numpy>=1.26.0",
+    "backoff>=2.2.1",
     "opencc>=1.1.9",
 ]

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 cerebras-cloud-sdk>=1.50.1
 datasets>=4.1.0
 gradio>=4.44.1
 ollama>=0.5.3

 cerebras-cloud-sdk>=1.50.1
+backoff
 datasets>=4.1.0
 gradio>=4.44.1
 ollama>=0.5.3

src/app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
 import gradio as gr
-from cerebras.cloud.sdk import Cerebras
 from dotenv import load_dotenv
 from opencc import OpenCC
@@ -15,7 +16,7 @@ from verification.verifier import verify_idiom_exists
 load_dotenv()
 MODEL = "gpt-oss-120b"
-USE_MOCK = True  # ✅ Toggle between mock and real API
 # simplified to traditional Chinese character converter
 char_converter = OpenCC('s2t')
@@ -64,6 +65,7 @@ def find_idiom_mock():
 EXAMPLE_CACHE = {}
 def find_idiom(situation: str, max_attempts: int = 3):
     """
     Find a verified Chinese idiom for a given situation.
@@ -73,50 +75,48 @@ def find_idiom(situation: str, max_attempts: int = 3):
     if situation in EXAMPLE_CACHE:
         return EXAMPLE_CACHE[situation]
-    for attempt in range(1, max_attempts + 1):
-        prompt = f"""You are a wise assistant. Given a situation, respond with exactly:
-1. A Chinese idiom (includes 成語、俗語、諺語),
-   written in simplified Chinese characters,
-   that conveys the idea of the given situation.
-2. Its literal English translationx
-3. Explain idiom in English. Keep explanation to 2-3 concise sentences.
-Format:
-Idiom
-Literal translation
-Explanation
-Situation: {situation}
-Answer:"""
-        response = CLIENT.chat.completions.create(
-            model=MODEL,
-            messages=[{"role": "user", "content": prompt}],
-        )
-        generated_text = response.choices[0].message.content.strip()
-        lines = [line.strip() for line in generated_text.split("\n") if line.strip()]
-        llm_idiom = lines[0] if lines else generated_text
-        trad_idiom = char_converter.convert(llm_idiom) if char_converter else None
-        # 2️⃣ Verify idiom using CC-CEDICT + Wiktionary
-        if verify_idiom_exists(llm_idiom):
-            pinyin_text = get_pinyin(llm_idiom)
-            if len(lines) >= 3:
-                translation = lines[1]
-                meaning = " ".join(lines[2:])
-            else:
-                translation = ""
-                meaning = " ".join(lines[1:])
-            explanation = format_explanation(pinyin_text, translation, meaning)
-            EXAMPLE_CACHE[situation] = (llm_idiom, explanation)
-            idiom_output = f"{llm_idiom}<br>{trad_idiom}"
-            return idiom_output, explanation
         else:
-            print(f"Attempt {attempt}: '{idiom_output}' failed verification, retrying...")
     # Fallback if no verified idiom found
     fallback_idiom = "未找到成语"
@@ -130,6 +130,12 @@ def update_ui(situation, char_mode: bool):
     if USE_MOCK:
         idiom, explanation = find_idiom_mock()
     else:
         idiom, explanation = find_idiom(situation)
     idiom_output = char_converter.convert(idiom.split("<br>")[0]) if char_mode else idiom

 import os
 import gradio as gr
+import backoff
+from cerebras.cloud.sdk import APIConnectionError, APIStatusError, Cerebras, RateLimitError
 from dotenv import load_dotenv
 from opencc import OpenCC
 load_dotenv()
 MODEL = "gpt-oss-120b"
+USE_MOCK = False  # ✅ Toggle between mock and real API
 # simplified to traditional Chinese character converter
 char_converter = OpenCC('s2t')
 EXAMPLE_CACHE = {}
+@backoff.on_exception(backoff.expo, RateLimitError)
 def find_idiom(situation: str, max_attempts: int = 3):
     """
     Find a verified Chinese idiom for a given situation.
     if situation in EXAMPLE_CACHE:
         return EXAMPLE_CACHE[situation]
+    prompt = f"""You are a wise assistant. Given a situation, respond with exactly:
+        1. A Chinese idiom (includes 成語、俗語、諺語),
+        written in simplified Chinese characters,
+        that conveys the idea of the given situation.
+        2. Its literal English translation
+        3. Explain idiom in English. Keep explanation to 2-3 concise sentences.
+        Format:
+        Idiom
+        Literal translation
+        Explanation
+        Situation: {situation}
+        Answer:"""
+    response = CLIENT.chat.completions.create(
+        model=MODEL,
+        messages=[{"role": "user", "content": prompt}],
+    )
+    generated_text = response.choices[0].message.content.strip()
+    lines = [line.strip() for line in generated_text.split("\n") if line.strip()]
+    llm_idiom = lines[0] if lines else generated_text
+    trad_idiom = char_converter.convert(llm_idiom) if char_converter else None
+    # 2️⃣ Verify idiom using CC-CEDICT + Wiktionary
+    if verify_idiom_exists(llm_idiom):
+        pinyin_text = get_pinyin(llm_idiom)
+        if len(lines) >= 3:
+            translation = lines[1]
+            meaning = " ".join(lines[2:])
         else:
+            translation = ""
+            meaning = " ".join(lines[1:])
+        explanation = format_explanation(pinyin_text, translation, meaning)
+        EXAMPLE_CACHE[situation] = (llm_idiom, explanation)
+        idiom_output = f"{llm_idiom}<br>{trad_idiom}"
+        return idiom_output, explanation
     # Fallback if no verified idiom found
     fallback_idiom = "未找到成语"
     if USE_MOCK:
         idiom, explanation = find_idiom_mock()
     else:
+        try:
+            idiom, explanation = find_idiom(situation)
+        except RateLimitError:
+            idiom = ""
+            explanation = "<div class='error-message'>Too many requests. Please try again later.</div>"
         idiom, explanation = find_idiom(situation)
     idiom_output = char_converter.convert(idiom.split("<br>")[0]) if char_mode else idiom

uv.lock CHANGED Viewed

@@ -185,6 +185,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f6/22/91616fe707a5c5510de2cac9b046a30defe7007ba8a0c04f9c08f27df312/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd", size = 25206, upload-time = "2025-08-05T16:43:16.444Z" },
 ]
 [[package]]
 name = "black"
 version = "25.1.0"
@@ -320,6 +329,7 @@ name = "chinese-idioms"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
     { name = "cerebras-cloud-sdk" },
     { name = "datasets" },
     { name = "gradio" },
@@ -343,6 +353,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "cerebras-cloud-sdk", specifier = ">=1.50.1" },
     { name = "datasets", specifier = ">=4.1.0" },
     { name = "gradio", specifier = ">=4.44.1" },

     { url = "https://files.pythonhosted.org/packages/f6/22/91616fe707a5c5510de2cac9b046a30defe7007ba8a0c04f9c08f27df312/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd", size = 25206, upload-time = "2025-08-05T16:43:16.444Z" },
 ]
+[[package]]
+name = "backoff"
+version = "2.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
+]
 [[package]]
 name = "black"
 version = "25.1.0"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "backoff" },
     { name = "cerebras-cloud-sdk" },
     { name = "datasets" },
     { name = "gradio" },
 [package.metadata]
 requires-dist = [
+    { name = "backoff", specifier = ">=2.2.1" },
     { name = "cerebras-cloud-sdk", specifier = ">=1.50.1" },
     { name = "datasets", specifier = ">=4.1.0" },
     { name = "gradio", specifier = ">=4.44.1" },