Spaces:
Sleeping
Sleeping
Merge branch 'main' into add-traditional
Browse files- pyproject.toml +1 -0
- requirements.txt +1 -0
- src/app.py +46 -40
- uv.lock +11 -0
pyproject.toml
CHANGED
|
@@ -13,6 +13,7 @@ dependencies = [
|
|
| 13 |
"pypinyin>=0.55.0",
|
| 14 |
"sentence-transformers>=2.2.2",
|
| 15 |
"numpy>=1.26.0",
|
|
|
|
| 16 |
"opencc>=1.1.9",
|
| 17 |
]
|
| 18 |
|
|
|
|
| 13 |
"pypinyin>=0.55.0",
|
| 14 |
"sentence-transformers>=2.2.2",
|
| 15 |
"numpy>=1.26.0",
|
| 16 |
+
"backoff>=2.2.1",
|
| 17 |
"opencc>=1.1.9",
|
| 18 |
]
|
| 19 |
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
cerebras-cloud-sdk>=1.50.1
|
|
|
|
| 2 |
datasets>=4.1.0
|
| 3 |
gradio>=4.44.1
|
| 4 |
ollama>=0.5.3
|
|
|
|
| 1 |
cerebras-cloud-sdk>=1.50.1
|
| 2 |
+
backoff
|
| 3 |
datasets>=4.1.0
|
| 4 |
gradio>=4.44.1
|
| 5 |
ollama>=0.5.3
|
src/app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
-
|
|
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from opencc import OpenCC
|
| 7 |
|
|
@@ -15,7 +16,7 @@ from verification.verifier import verify_idiom_exists
|
|
| 15 |
load_dotenv()
|
| 16 |
|
| 17 |
MODEL = "gpt-oss-120b"
|
| 18 |
-
USE_MOCK =
|
| 19 |
|
| 20 |
# simplified to traditional Chinese character converter
|
| 21 |
char_converter = OpenCC('s2t')
|
|
@@ -64,6 +65,7 @@ def find_idiom_mock():
|
|
| 64 |
EXAMPLE_CACHE = {}
|
| 65 |
|
| 66 |
|
|
|
|
| 67 |
def find_idiom(situation: str, max_attempts: int = 3):
|
| 68 |
"""
|
| 69 |
Find a verified Chinese idiom for a given situation.
|
|
@@ -73,50 +75,48 @@ def find_idiom(situation: str, max_attempts: int = 3):
|
|
| 73 |
if situation in EXAMPLE_CACHE:
|
| 74 |
return EXAMPLE_CACHE[situation]
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
Answer:"""
|
| 91 |
-
|
| 92 |
-
response = CLIENT.chat.completions.create(
|
| 93 |
-
model=MODEL,
|
| 94 |
-
messages=[{"role": "user", "content": prompt}],
|
| 95 |
-
)
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
pinyin_text = get_pinyin(llm_idiom)
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
else:
|
| 111 |
-
translation = ""
|
| 112 |
-
meaning = " ".join(lines[1:])
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
return idiom_output, explanation
|
| 118 |
else:
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
# Fallback if no verified idiom found
|
| 122 |
fallback_idiom = "未找到成语"
|
|
@@ -130,6 +130,12 @@ def update_ui(situation, char_mode: bool):
|
|
| 130 |
if USE_MOCK:
|
| 131 |
idiom, explanation = find_idiom_mock()
|
| 132 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
idiom, explanation = find_idiom(situation)
|
| 134 |
|
| 135 |
idiom_output = char_converter.convert(idiom.split("<br>")[0]) if char_mode else idiom
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
+
import backoff
|
| 5 |
+
from cerebras.cloud.sdk import APIConnectionError, APIStatusError, Cerebras, RateLimitError
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from opencc import OpenCC
|
| 8 |
|
|
|
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
MODEL = "gpt-oss-120b"
|
| 19 |
+
USE_MOCK = False # ✅ Toggle between mock and real API
|
| 20 |
|
| 21 |
# simplified to traditional Chinese character converter
|
| 22 |
char_converter = OpenCC('s2t')
|
|
|
|
| 65 |
EXAMPLE_CACHE = {}
|
| 66 |
|
| 67 |
|
| 68 |
+
@backoff.on_exception(backoff.expo, RateLimitError)
|
| 69 |
def find_idiom(situation: str, max_attempts: int = 3):
|
| 70 |
"""
|
| 71 |
Find a verified Chinese idiom for a given situation.
|
|
|
|
| 75 |
if situation in EXAMPLE_CACHE:
|
| 76 |
return EXAMPLE_CACHE[situation]
|
| 77 |
|
| 78 |
+
prompt = f"""You are a wise assistant. Given a situation, respond with exactly:
|
| 79 |
+
1. A Chinese idiom (includes 成語、俗語、諺語),
|
| 80 |
+
written in simplified Chinese characters,
|
| 81 |
+
that conveys the idea of the given situation.
|
| 82 |
+
2. Its literal English translation
|
| 83 |
+
3. Explain idiom in English. Keep explanation to 2-3 concise sentences.
|
| 84 |
+
|
| 85 |
+
Format:
|
| 86 |
+
Idiom
|
| 87 |
+
Literal translation
|
| 88 |
+
Explanation
|
| 89 |
+
|
| 90 |
+
Situation: {situation}
|
| 91 |
+
Answer:"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
response = CLIENT.chat.completions.create(
|
| 94 |
+
model=MODEL,
|
| 95 |
+
messages=[{"role": "user", "content": prompt}],
|
| 96 |
+
)
|
| 97 |
|
| 98 |
+
generated_text = response.choices[0].message.content.strip()
|
| 99 |
+
lines = [line.strip() for line in generated_text.split("\n") if line.strip()]
|
| 100 |
|
| 101 |
+
llm_idiom = lines[0] if lines else generated_text
|
| 102 |
+
trad_idiom = char_converter.convert(llm_idiom) if char_converter else None
|
|
|
|
| 103 |
|
| 104 |
+
# 2️⃣ Verify idiom using CC-CEDICT + Wiktionary
|
| 105 |
+
if verify_idiom_exists(llm_idiom):
|
| 106 |
+
pinyin_text = get_pinyin(llm_idiom)
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
if len(lines) >= 3:
|
| 109 |
+
translation = lines[1]
|
| 110 |
+
meaning = " ".join(lines[2:])
|
|
|
|
| 111 |
else:
|
| 112 |
+
translation = ""
|
| 113 |
+
meaning = " ".join(lines[1:])
|
| 114 |
+
|
| 115 |
+
explanation = format_explanation(pinyin_text, translation, meaning)
|
| 116 |
+
EXAMPLE_CACHE[situation] = (llm_idiom, explanation)
|
| 117 |
+
idiom_output = f"{llm_idiom}<br>{trad_idiom}"
|
| 118 |
+
return idiom_output, explanation
|
| 119 |
+
|
| 120 |
|
| 121 |
# Fallback if no verified idiom found
|
| 122 |
fallback_idiom = "未找到成语"
|
|
|
|
| 130 |
if USE_MOCK:
|
| 131 |
idiom, explanation = find_idiom_mock()
|
| 132 |
else:
|
| 133 |
+
try:
|
| 134 |
+
idiom, explanation = find_idiom(situation)
|
| 135 |
+
except RateLimitError:
|
| 136 |
+
idiom = ""
|
| 137 |
+
explanation = "<div class='error-message'>Too many requests. Please try again later.</div>"
|
| 138 |
+
|
| 139 |
idiom, explanation = find_idiom(situation)
|
| 140 |
|
| 141 |
idiom_output = char_converter.convert(idiom.split("<br>")[0]) if char_mode else idiom
|
uv.lock
CHANGED
|
@@ -185,6 +185,15 @@ wheels = [
|
|
| 185 |
{ url = "https://files.pythonhosted.org/packages/f6/22/91616fe707a5c5510de2cac9b046a30defe7007ba8a0c04f9c08f27df312/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd", size = 25206, upload-time = "2025-08-05T16:43:16.444Z" },
|
| 186 |
]
|
| 187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
[[package]]
|
| 189 |
name = "black"
|
| 190 |
version = "25.1.0"
|
|
@@ -320,6 +329,7 @@ name = "chinese-idioms"
|
|
| 320 |
version = "0.1.0"
|
| 321 |
source = { virtual = "." }
|
| 322 |
dependencies = [
|
|
|
|
| 323 |
{ name = "cerebras-cloud-sdk" },
|
| 324 |
{ name = "datasets" },
|
| 325 |
{ name = "gradio" },
|
|
@@ -343,6 +353,7 @@ dev = [
|
|
| 343 |
|
| 344 |
[package.metadata]
|
| 345 |
requires-dist = [
|
|
|
|
| 346 |
{ name = "cerebras-cloud-sdk", specifier = ">=1.50.1" },
|
| 347 |
{ name = "datasets", specifier = ">=4.1.0" },
|
| 348 |
{ name = "gradio", specifier = ">=4.44.1" },
|
|
|
|
| 185 |
{ url = "https://files.pythonhosted.org/packages/f6/22/91616fe707a5c5510de2cac9b046a30defe7007ba8a0c04f9c08f27df312/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd", size = 25206, upload-time = "2025-08-05T16:43:16.444Z" },
|
| 186 |
]
|
| 187 |
|
| 188 |
+
[[package]]
|
| 189 |
+
name = "backoff"
|
| 190 |
+
version = "2.2.1"
|
| 191 |
+
source = { registry = "https://pypi.org/simple" }
|
| 192 |
+
sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" }
|
| 193 |
+
wheels = [
|
| 194 |
+
{ url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
|
| 195 |
+
]
|
| 196 |
+
|
| 197 |
[[package]]
|
| 198 |
name = "black"
|
| 199 |
version = "25.1.0"
|
|
|
|
| 329 |
version = "0.1.0"
|
| 330 |
source = { virtual = "." }
|
| 331 |
dependencies = [
|
| 332 |
+
{ name = "backoff" },
|
| 333 |
{ name = "cerebras-cloud-sdk" },
|
| 334 |
{ name = "datasets" },
|
| 335 |
{ name = "gradio" },
|
|
|
|
| 353 |
|
| 354 |
[package.metadata]
|
| 355 |
requires-dist = [
|
| 356 |
+
{ name = "backoff", specifier = ">=2.2.1" },
|
| 357 |
{ name = "cerebras-cloud-sdk", specifier = ">=1.50.1" },
|
| 358 |
{ name = "datasets", specifier = ">=4.1.0" },
|
| 359 |
{ name = "gradio", specifier = ">=4.44.1" },
|