Mel Seto commited on
Commit
a6cd490
·
2 Parent(s): 7ce643e 996c31c

Merge branch 'main' into add-traditional

Browse files
Files changed (4) hide show
  1. pyproject.toml +1 -0
  2. requirements.txt +1 -0
  3. src/app.py +46 -40
  4. uv.lock +11 -0
pyproject.toml CHANGED
@@ -13,6 +13,7 @@ dependencies = [
13
  "pypinyin>=0.55.0",
14
  "sentence-transformers>=2.2.2",
15
  "numpy>=1.26.0",
 
16
  "opencc>=1.1.9",
17
  ]
18
 
 
13
  "pypinyin>=0.55.0",
14
  "sentence-transformers>=2.2.2",
15
  "numpy>=1.26.0",
16
+ "backoff>=2.2.1",
17
  "opencc>=1.1.9",
18
  ]
19
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  cerebras-cloud-sdk>=1.50.1
 
2
  datasets>=4.1.0
3
  gradio>=4.44.1
4
  ollama>=0.5.3
 
1
  cerebras-cloud-sdk>=1.50.1
2
+ backoff
3
  datasets>=4.1.0
4
  gradio>=4.44.1
5
  ollama>=0.5.3
src/app.py CHANGED
@@ -1,7 +1,8 @@
1
  import os
2
 
3
  import gradio as gr
4
- from cerebras.cloud.sdk import Cerebras
 
5
  from dotenv import load_dotenv
6
  from opencc import OpenCC
7
 
@@ -15,7 +16,7 @@ from verification.verifier import verify_idiom_exists
15
  load_dotenv()
16
 
17
  MODEL = "gpt-oss-120b"
18
- USE_MOCK = True # ✅ Toggle between mock and real API
19
 
20
  # simplified to traditional Chinese character converter
21
  char_converter = OpenCC('s2t')
@@ -64,6 +65,7 @@ def find_idiom_mock():
64
  EXAMPLE_CACHE = {}
65
 
66
 
 
67
  def find_idiom(situation: str, max_attempts: int = 3):
68
  """
69
  Find a verified Chinese idiom for a given situation.
@@ -73,50 +75,48 @@ def find_idiom(situation: str, max_attempts: int = 3):
73
  if situation in EXAMPLE_CACHE:
74
  return EXAMPLE_CACHE[situation]
75
 
76
- for attempt in range(1, max_attempts + 1):
77
- prompt = f"""You are a wise assistant. Given a situation, respond with exactly:
78
- 1. A Chinese idiom (includes 成語、俗語、諺語),
79
- written in simplified Chinese characters,
80
- that conveys the idea of the given situation.
81
- 2. Its literal English translationx
82
- 3. Explain idiom in English. Keep explanation to 2-3 concise sentences.
83
-
84
- Format:
85
- Idiom
86
- Literal translation
87
- Explanation
88
-
89
- Situation: {situation}
90
- Answer:"""
91
-
92
- response = CLIENT.chat.completions.create(
93
- model=MODEL,
94
- messages=[{"role": "user", "content": prompt}],
95
- )
96
 
97
- generated_text = response.choices[0].message.content.strip()
98
- lines = [line.strip() for line in generated_text.split("\n") if line.strip()]
 
 
99
 
100
- llm_idiom = lines[0] if lines else generated_text
101
- trad_idiom = char_converter.convert(llm_idiom) if char_converter else None
102
 
103
- # 2️⃣ Verify idiom using CC-CEDICT + Wiktionary
104
- if verify_idiom_exists(llm_idiom):
105
- pinyin_text = get_pinyin(llm_idiom)
106
 
107
- if len(lines) >= 3:
108
- translation = lines[1]
109
- meaning = " ".join(lines[2:])
110
- else:
111
- translation = ""
112
- meaning = " ".join(lines[1:])
113
 
114
- explanation = format_explanation(pinyin_text, translation, meaning)
115
- EXAMPLE_CACHE[situation] = (llm_idiom, explanation)
116
- idiom_output = f"{llm_idiom}<br>{trad_idiom}"
117
- return idiom_output, explanation
118
  else:
119
- print(f"Attempt {attempt}: '{idiom_output}' failed verification, retrying...")
 
 
 
 
 
 
 
120
 
121
  # Fallback if no verified idiom found
122
  fallback_idiom = "未找到成语"
@@ -130,6 +130,12 @@ def update_ui(situation, char_mode: bool):
130
  if USE_MOCK:
131
  idiom, explanation = find_idiom_mock()
132
  else:
 
 
 
 
 
 
133
  idiom, explanation = find_idiom(situation)
134
 
135
  idiom_output = char_converter.convert(idiom.split("<br>")[0]) if char_mode else idiom
 
1
  import os
2
 
3
  import gradio as gr
4
+ import backoff
5
+ from cerebras.cloud.sdk import APIConnectionError, APIStatusError, Cerebras, RateLimitError
6
  from dotenv import load_dotenv
7
  from opencc import OpenCC
8
 
 
16
  load_dotenv()
17
 
18
  MODEL = "gpt-oss-120b"
19
+ USE_MOCK = False # ✅ Toggle between mock and real API
20
 
21
  # simplified to traditional Chinese character converter
22
  char_converter = OpenCC('s2t')
 
65
  EXAMPLE_CACHE = {}
66
 
67
 
68
+ @backoff.on_exception(backoff.expo, RateLimitError)
69
  def find_idiom(situation: str, max_attempts: int = 3):
70
  """
71
  Find a verified Chinese idiom for a given situation.
 
75
  if situation in EXAMPLE_CACHE:
76
  return EXAMPLE_CACHE[situation]
77
 
78
+ prompt = f"""You are a wise assistant. Given a situation, respond with exactly:
79
+ 1. A Chinese idiom (includes 成語、俗語、諺語),
80
+ written in simplified Chinese characters,
81
+ that conveys the idea of the given situation.
82
+ 2. Its literal English translation
83
+ 3. Explain idiom in English. Keep explanation to 2-3 concise sentences.
84
+
85
+ Format:
86
+ Idiom
87
+ Literal translation
88
+ Explanation
89
+
90
+ Situation: {situation}
91
+ Answer:"""
 
 
 
 
 
 
92
 
93
+ response = CLIENT.chat.completions.create(
94
+ model=MODEL,
95
+ messages=[{"role": "user", "content": prompt}],
96
+ )
97
 
98
+ generated_text = response.choices[0].message.content.strip()
99
+ lines = [line.strip() for line in generated_text.split("\n") if line.strip()]
100
 
101
+ llm_idiom = lines[0] if lines else generated_text
102
+ trad_idiom = char_converter.convert(llm_idiom) if char_converter else None
 
103
 
104
+ # 2️⃣ Verify idiom using CC-CEDICT + Wiktionary
105
+ if verify_idiom_exists(llm_idiom):
106
+ pinyin_text = get_pinyin(llm_idiom)
 
 
 
107
 
108
+ if len(lines) >= 3:
109
+ translation = lines[1]
110
+ meaning = " ".join(lines[2:])
 
111
  else:
112
+ translation = ""
113
+ meaning = " ".join(lines[1:])
114
+
115
+ explanation = format_explanation(pinyin_text, translation, meaning)
116
+ EXAMPLE_CACHE[situation] = (llm_idiom, explanation)
117
+ idiom_output = f"{llm_idiom}<br>{trad_idiom}"
118
+ return idiom_output, explanation
119
+
120
 
121
  # Fallback if no verified idiom found
122
  fallback_idiom = "未找到成语"
 
130
  if USE_MOCK:
131
  idiom, explanation = find_idiom_mock()
132
  else:
133
+ try:
134
+ idiom, explanation = find_idiom(situation)
135
+ except RateLimitError:
136
+ idiom = ""
137
+ explanation = "<div class='error-message'>Too many requests. Please try again later.</div>"
138
+
139
  idiom, explanation = find_idiom(situation)
140
 
141
  idiom_output = char_converter.convert(idiom.split("<br>")[0]) if char_mode else idiom
uv.lock CHANGED
@@ -185,6 +185,15 @@ wheels = [
185
  { url = "https://files.pythonhosted.org/packages/f6/22/91616fe707a5c5510de2cac9b046a30defe7007ba8a0c04f9c08f27df312/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd", size = 25206, upload-time = "2025-08-05T16:43:16.444Z" },
186
  ]
187
 
 
 
 
 
 
 
 
 
 
188
  [[package]]
189
  name = "black"
190
  version = "25.1.0"
@@ -320,6 +329,7 @@ name = "chinese-idioms"
320
  version = "0.1.0"
321
  source = { virtual = "." }
322
  dependencies = [
 
323
  { name = "cerebras-cloud-sdk" },
324
  { name = "datasets" },
325
  { name = "gradio" },
@@ -343,6 +353,7 @@ dev = [
343
 
344
  [package.metadata]
345
  requires-dist = [
 
346
  { name = "cerebras-cloud-sdk", specifier = ">=1.50.1" },
347
  { name = "datasets", specifier = ">=4.1.0" },
348
  { name = "gradio", specifier = ">=4.44.1" },
 
185
  { url = "https://files.pythonhosted.org/packages/f6/22/91616fe707a5c5510de2cac9b046a30defe7007ba8a0c04f9c08f27df312/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd", size = 25206, upload-time = "2025-08-05T16:43:16.444Z" },
186
  ]
187
 
188
+ [[package]]
189
+ name = "backoff"
190
+ version = "2.2.1"
191
+ source = { registry = "https://pypi.org/simple" }
192
+ sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" }
193
+ wheels = [
194
+ { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
195
+ ]
196
+
197
  [[package]]
198
  name = "black"
199
  version = "25.1.0"
 
329
  version = "0.1.0"
330
  source = { virtual = "." }
331
  dependencies = [
332
+ { name = "backoff" },
333
  { name = "cerebras-cloud-sdk" },
334
  { name = "datasets" },
335
  { name = "gradio" },
 
353
 
354
  [package.metadata]
355
  requires-dist = [
356
+ { name = "backoff", specifier = ">=2.2.1" },
357
  { name = "cerebras-cloud-sdk", specifier = ">=1.50.1" },
358
  { name = "datasets", specifier = ">=4.1.0" },
359
  { name = "gradio", specifier = ">=4.44.1" },