Mel Seto commited on
Commit
f797cdd
·
1 Parent(s): 15eacd7

organizing files into src folder etc.

Browse files
Files changed (7) hide show
  1. pyproject.toml +5 -0
  2. requirements.txt +267 -0
  3. src/app.py +148 -0
  4. src/run.py +14 -0
  5. src/style.css +48 -0
  6. src/utils/__init__.py +0 -0
  7. src/utils/utils.py +7 -0
pyproject.toml CHANGED
@@ -11,6 +11,11 @@ dependencies = [
11
  "ollama>=0.5.3",
12
  "pycccedict>=1.2.0",
13
  "pypinyin>=0.55.0",
 
 
 
 
 
14
  ]
15
 
16
  [dependency-groups]
 
11
  "ollama>=0.5.3",
12
  "pycccedict>=1.2.0",
13
  "pypinyin>=0.55.0",
14
+ <<<<<<< HEAD
15
+ =======
16
+ "sentence-transformers>=2.2.2",
17
+ "numpy>=1.26.0",
18
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
19
  ]
20
 
21
  [dependency-groups]
requirements.txt CHANGED
@@ -1,5 +1,9 @@
1
  # This file was autogenerated by uv via the following command:
 
2
  # uv pip compile pyproject.toml -o requirements.txt
 
 
 
3
  aiofiles==24.1.0
4
  # via gradio
5
  aiohappyeyeballs==2.6.1
@@ -16,12 +20,27 @@ anyio==4.10.0
16
  # gradio
17
  # httpx
18
  # starlette
 
19
  attrs==25.3.0
20
  # via aiohttp
21
  brotli==1.1.0
22
  # via gradio
23
  cerebras-cloud-sdk==1.50.1
24
  # via chinese-idioms (pyproject.toml)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  certifi==2025.8.3
26
  # via
27
  # httpcore
@@ -31,17 +50,38 @@ charset-normalizer==3.4.3
31
  # via requests
32
  click==8.2.1
33
  # via
 
34
  # typer
35
  # uvicorn
36
  datasets==4.1.0
37
  # via chinese-idioms (pyproject.toml)
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  dill==0.4.0
39
  # via
40
  # datasets
41
  # multiprocess
 
42
  distro==1.9.0
43
  # via cerebras-cloud-sdk
44
  fastapi==0.116.2
 
 
 
 
 
 
45
  # via gradio
46
  ffmpy==0.6.1
47
  # via gradio
@@ -49,18 +89,34 @@ filelock==3.19.1
49
  # via
50
  # datasets
51
  # huggingface-hub
 
 
 
 
 
52
  frozenlist==1.7.0
53
  # via
54
  # aiohttp
55
  # aiosignal
 
56
  fsspec==2025.9.0
 
 
 
57
  # via
58
  # datasets
59
  # gradio-client
60
  # huggingface-hub
 
61
  gradio==5.46.0
62
  # via chinese-idioms (pyproject.toml)
63
  gradio-client==1.13.0
 
 
 
 
 
 
64
  # via gradio
65
  groovy==0.1.2
66
  # via gradio
@@ -68,7 +124,11 @@ h11==0.16.0
68
  # via
69
  # httpcore
70
  # uvicorn
 
71
  hf-xet==1.1.10
 
 
 
72
  # via huggingface-hub
73
  httpcore==1.0.9
74
  # via httpx
@@ -79,33 +139,67 @@ httpx==0.28.1
79
  # gradio-client
80
  # ollama
81
  # safehttpx
 
82
  huggingface-hub==0.35.0
 
 
 
83
  # via
84
  # datasets
85
  # gradio
86
  # gradio-client
 
 
 
 
 
 
87
  idna==3.10
88
  # via
89
  # anyio
90
  # httpx
91
  # requests
92
  # yarl
 
93
  jinja2==3.1.6
94
  # via gradio
95
  markdown-it-py==4.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # via rich
97
  markupsafe==3.0.2
98
  # via
99
  # gradio
100
  # jinja2
 
101
  mdurl==0.1.2
102
  # via markdown-it-py
 
 
 
 
 
 
 
 
103
  multidict==6.6.4
104
  # via
105
  # aiohttp
106
  # yarl
107
  multiprocess==0.70.16
108
  # via datasets
 
109
  numpy >= 2.0, < 3.0
110
  # via
111
  # datasets
@@ -113,20 +207,98 @@ numpy >= 2.0, < 3.0
113
  # pandas
114
  ollama==0.5.4
115
  # via chinese-idioms (pyproject.toml)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  orjson==3.11.3
117
  # via gradio
118
  packaging==25.0
119
  # via
 
 
 
 
120
  # datasets
121
  # gradio
122
  # gradio-client
123
  # huggingface-hub
 
 
 
 
 
124
  pandas==2.3.2
125
  # via
126
  # datasets
127
  # gradio
 
128
  pillow==11.3.0
129
  # via gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  propcache==0.3.2
131
  # via
132
  # aiohttp
@@ -134,8 +306,13 @@ propcache==0.3.2
134
  pyarrow==21.0.0
135
  # via datasets
136
  pycccedict==1.2.0
 
137
  # via chinese-idioms (pyproject.toml)
138
  pydantic==2.11.9
 
 
 
 
139
  # via
140
  # cerebras-cloud-sdk
141
  # fastapi
@@ -146,11 +323,24 @@ pydantic-core==2.33.2
146
  pydub==0.25.1
147
  # via gradio
148
  pygments==2.19.2
 
149
  # via rich
150
  pypinyin==0.55.0
151
  # via chinese-idioms (pyproject.toml)
152
  python-dateutil==2.9.0.post0
153
  # via pandas
 
 
 
 
 
 
 
 
 
 
 
 
154
  python-multipart==0.0.20
155
  # via gradio
156
  pytz==2025.2
@@ -160,10 +350,17 @@ pyyaml==6.0.2
160
  # datasets
161
  # gradio
162
  # huggingface-hub
 
 
 
 
 
 
163
  requests==2.32.5
164
  # via
165
  # datasets
166
  # huggingface-hub
 
167
  rich==14.1.0
168
  # via typer
169
  ruff==0.13.0
@@ -173,6 +370,32 @@ safehttpx==0.1.6
173
  semantic-version==2.10.0
174
  # via gradio
175
  shellingham==1.5.4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  # via typer
177
  six==1.17.0
178
  # via python-dateutil
@@ -180,17 +403,46 @@ sniffio==1.3.1
180
  # via
181
  # anyio
182
  # cerebras-cloud-sdk
 
183
  starlette==0.48.0
184
  # via
185
  # fastapi
186
  # gradio
187
  tomlkit==0.13.3
188
  # via gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  tqdm==4.67.1
190
  # via
191
  # datasets
192
  # huggingface-hub
 
193
  typer==0.17.4
 
 
 
 
 
 
 
 
 
194
  # via gradio
195
  typing-extensions==4.15.0
196
  # via
@@ -203,7 +455,13 @@ typing-extensions==4.15.0
203
  # huggingface-hub
204
  # pydantic
205
  # pydantic-core
 
 
 
 
206
  # starlette
 
 
207
  # typer
208
  # typing-inspection
209
  typing-inspection==0.4.1
@@ -211,9 +469,18 @@ typing-inspection==0.4.1
211
  tzdata==2025.2
212
  # via pandas
213
  urllib3==2.5.0
 
214
  # via requests
215
  uvicorn==0.35.0
216
  # via gradio
 
 
 
 
 
 
 
 
217
  websockets==15.0.1
218
  # via gradio-client
219
  xxhash==3.5.0
 
1
  # This file was autogenerated by uv via the following command:
2
+ <<<<<<< HEAD
3
  # uv pip compile pyproject.toml -o requirements.txt
4
+ =======
5
+ # uv export --no-hashes --format requirements-txt
6
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
7
  aiofiles==24.1.0
8
  # via gradio
9
  aiohappyeyeballs==2.6.1
 
20
  # gradio
21
  # httpx
22
  # starlette
23
+ <<<<<<< HEAD
24
  attrs==25.3.0
25
  # via aiohttp
26
  brotli==1.1.0
27
  # via gradio
28
  cerebras-cloud-sdk==1.50.1
29
  # via chinese-idioms (pyproject.toml)
30
+ =======
31
+ # watchfiles
32
+ astroid==3.3.11
33
+ # via pylint
34
+ attrs==25.3.0
35
+ # via aiohttp
36
+ audioop-lts==0.2.2 ; python_full_version >= '3.13'
37
+ # via gradio
38
+ black==25.1.0
39
+ brotli==1.1.0
40
+ # via gradio
41
+ cerebras-cloud-sdk==1.50.1
42
+ # via chinese-idioms
43
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
44
  certifi==2025.8.3
45
  # via
46
  # httpcore
 
50
  # via requests
51
  click==8.2.1
52
  # via
53
+ <<<<<<< HEAD
54
  # typer
55
  # uvicorn
56
  datasets==4.1.0
57
  # via chinese-idioms (pyproject.toml)
58
+ =======
59
+ # black
60
+ # typer
61
+ # uvicorn
62
+ colorama==0.4.6 ; sys_platform == 'win32'
63
+ # via
64
+ # click
65
+ # pylint
66
+ # pytest
67
+ # tqdm
68
+ datasets==4.1.0
69
+ # via chinese-idioms
70
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
71
  dill==0.4.0
72
  # via
73
  # datasets
74
  # multiprocess
75
+ <<<<<<< HEAD
76
  distro==1.9.0
77
  # via cerebras-cloud-sdk
78
  fastapi==0.116.2
79
+ =======
80
+ # pylint
81
+ distro==1.9.0
82
+ # via cerebras-cloud-sdk
83
+ fastapi==0.116.1
84
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
85
  # via gradio
86
  ffmpy==0.6.1
87
  # via gradio
 
89
  # via
90
  # datasets
91
  # huggingface-hub
92
+ <<<<<<< HEAD
93
+ =======
94
+ # torch
95
+ # transformers
96
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
97
  frozenlist==1.7.0
98
  # via
99
  # aiohttp
100
  # aiosignal
101
+ <<<<<<< HEAD
102
  fsspec==2025.9.0
103
+ =======
104
+ fsspec==2025.7.0
105
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
106
  # via
107
  # datasets
108
  # gradio-client
109
  # huggingface-hub
110
+ <<<<<<< HEAD
111
  gradio==5.46.0
112
  # via chinese-idioms (pyproject.toml)
113
  gradio-client==1.13.0
114
+ =======
115
+ # torch
116
+ gradio==5.44.0
117
+ # via chinese-idioms
118
+ gradio-client==1.12.1
119
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
120
  # via gradio
121
  groovy==0.1.2
122
  # via gradio
 
124
  # via
125
  # httpcore
126
  # uvicorn
127
+ <<<<<<< HEAD
128
  hf-xet==1.1.10
129
+ =======
130
+ hf-xet==1.1.8 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
131
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
132
  # via huggingface-hub
133
  httpcore==1.0.9
134
  # via httpx
 
139
  # gradio-client
140
  # ollama
141
  # safehttpx
142
+ <<<<<<< HEAD
143
  huggingface-hub==0.35.0
144
+ =======
145
+ huggingface-hub==0.34.4
146
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
147
  # via
148
  # datasets
149
  # gradio
150
  # gradio-client
151
+ <<<<<<< HEAD
152
+ =======
153
+ # sentence-transformers
154
+ # tokenizers
155
+ # transformers
156
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
157
  idna==3.10
158
  # via
159
  # anyio
160
  # httpx
161
  # requests
162
  # yarl
163
+ <<<<<<< HEAD
164
  jinja2==3.1.6
165
  # via gradio
166
  markdown-it-py==4.0.0
167
+ =======
168
+ iniconfig==2.1.0
169
+ # via pytest
170
+ isort==6.0.1
171
+ # via pylint
172
+ jinja2==3.1.6
173
+ # via
174
+ # gradio
175
+ # torch
176
+ joblib==1.5.2
177
+ # via scikit-learn
178
+ markdown-it-py==4.0.0 ; sys_platform != 'emscripten'
179
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
180
  # via rich
181
  markupsafe==3.0.2
182
  # via
183
  # gradio
184
  # jinja2
185
+ <<<<<<< HEAD
186
  mdurl==0.1.2
187
  # via markdown-it-py
188
+ =======
189
+ mccabe==0.7.0
190
+ # via pylint
191
+ mdurl==0.1.2 ; sys_platform != 'emscripten'
192
+ # via markdown-it-py
193
+ mpmath==1.3.0
194
+ # via sympy
195
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
196
  multidict==6.6.4
197
  # via
198
  # aiohttp
199
  # yarl
200
  multiprocess==0.70.16
201
  # via datasets
202
+ <<<<<<< HEAD
203
  numpy >= 2.0, < 3.0
204
  # via
205
  # datasets
 
207
  # pandas
208
  ollama==0.5.4
209
  # via chinese-idioms (pyproject.toml)
210
+ =======
211
+ mypy-extensions==1.1.0
212
+ # via black
213
+ networkx==3.5
214
+ # via torch
215
+ numpy==2.3.2
216
+ # via
217
+ # chinese-idioms
218
+ # datasets
219
+ # gradio
220
+ # pandas
221
+ # scikit-learn
222
+ # scipy
223
+ # transformers
224
+ nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
225
+ # via
226
+ # nvidia-cudnn-cu12
227
+ # nvidia-cusolver-cu12
228
+ # torch
229
+ nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
230
+ # via torch
231
+ nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
232
+ # via torch
233
+ nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
234
+ # via torch
235
+ nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
236
+ # via torch
237
+ nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
238
+ # via torch
239
+ nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
240
+ # via torch
241
+ nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
242
+ # via torch
243
+ nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
244
+ # via torch
245
+ nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
246
+ # via
247
+ # nvidia-cusolver-cu12
248
+ # torch
249
+ nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
250
+ # via torch
251
+ nvidia-nccl-cu12==2.27.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
252
+ # via torch
253
+ nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
254
+ # via
255
+ # nvidia-cufft-cu12
256
+ # nvidia-cusolver-cu12
257
+ # nvidia-cusparse-cu12
258
+ # torch
259
+ nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
260
+ # via torch
261
+ ollama==0.5.3
262
+ # via chinese-idioms
263
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
264
  orjson==3.11.3
265
  # via gradio
266
  packaging==25.0
267
  # via
268
+ <<<<<<< HEAD
269
+ =======
270
+ # black
271
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
272
  # datasets
273
  # gradio
274
  # gradio-client
275
  # huggingface-hub
276
+ <<<<<<< HEAD
277
+ =======
278
+ # pytest
279
+ # transformers
280
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
281
  pandas==2.3.2
282
  # via
283
  # datasets
284
  # gradio
285
+ <<<<<<< HEAD
286
  pillow==11.3.0
287
  # via gradio
288
+ =======
289
+ pathspec==0.12.1
290
+ # via black
291
+ pillow==11.3.0
292
+ # via
293
+ # gradio
294
+ # sentence-transformers
295
+ platformdirs==4.4.0
296
+ # via
297
+ # black
298
+ # pylint
299
+ pluggy==1.6.0
300
+ # via pytest
301
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
302
  propcache==0.3.2
303
  # via
304
  # aiohttp
 
306
  pyarrow==21.0.0
307
  # via datasets
308
  pycccedict==1.2.0
309
+ <<<<<<< HEAD
310
  # via chinese-idioms (pyproject.toml)
311
  pydantic==2.11.9
312
+ =======
313
+ # via chinese-idioms
314
+ pydantic==2.11.7
315
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
316
  # via
317
  # cerebras-cloud-sdk
318
  # fastapi
 
323
  pydub==0.25.1
324
  # via gradio
325
  pygments==2.19.2
326
+ <<<<<<< HEAD
327
  # via rich
328
  pypinyin==0.55.0
329
  # via chinese-idioms (pyproject.toml)
330
  python-dateutil==2.9.0.post0
331
  # via pandas
332
+ =======
333
+ # via
334
+ # pytest
335
+ # rich
336
+ pylint==3.3.8
337
+ pypinyin==0.55.0
338
+ # via chinese-idioms
339
+ pytest==8.4.2
340
+ python-dateutil==2.9.0.post0
341
+ # via pandas
342
+ python-dotenv==1.1.1
343
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
344
  python-multipart==0.0.20
345
  # via gradio
346
  pytz==2025.2
 
350
  # datasets
351
  # gradio
352
  # huggingface-hub
353
+ <<<<<<< HEAD
354
+ =======
355
+ # transformers
356
+ regex==2025.9.18
357
+ # via transformers
358
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
359
  requests==2.32.5
360
  # via
361
  # datasets
362
  # huggingface-hub
363
+ <<<<<<< HEAD
364
  rich==14.1.0
365
  # via typer
366
  ruff==0.13.0
 
370
  semantic-version==2.10.0
371
  # via gradio
372
  shellingham==1.5.4
373
+ =======
374
+ # transformers
375
+ rich==14.1.0 ; sys_platform != 'emscripten'
376
+ # via typer
377
+ ruff==0.12.10 ; sys_platform != 'emscripten'
378
+ # via gradio
379
+ safehttpx==0.1.6
380
+ # via gradio
381
+ safetensors==0.6.2
382
+ # via transformers
383
+ scikit-learn==1.7.2
384
+ # via sentence-transformers
385
+ scipy==1.16.2
386
+ # via
387
+ # scikit-learn
388
+ # sentence-transformers
389
+ semantic-version==2.10.0
390
+ # via gradio
391
+ sentence-transformers==5.1.1
392
+ # via chinese-idioms
393
+ setuptools==80.9.0
394
+ # via
395
+ # torch
396
+ # triton
397
+ shellingham==1.5.4 ; sys_platform != 'emscripten'
398
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
399
  # via typer
400
  six==1.17.0
401
  # via python-dateutil
 
403
  # via
404
  # anyio
405
  # cerebras-cloud-sdk
406
+ <<<<<<< HEAD
407
  starlette==0.48.0
408
  # via
409
  # fastapi
410
  # gradio
411
  tomlkit==0.13.3
412
  # via gradio
413
+ =======
414
+ starlette==0.47.3
415
+ # via
416
+ # fastapi
417
+ # gradio
418
+ sympy==1.14.0
419
+ # via torch
420
+ threadpoolctl==3.6.0
421
+ # via scikit-learn
422
+ tokenizers==0.22.1
423
+ # via transformers
424
+ tomlkit==0.13.3
425
+ # via
426
+ # gradio
427
+ # pylint
428
+ torch==2.8.0
429
+ # via sentence-transformers
430
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
431
  tqdm==4.67.1
432
  # via
433
  # datasets
434
  # huggingface-hub
435
+ <<<<<<< HEAD
436
  typer==0.17.4
437
+ =======
438
+ # sentence-transformers
439
+ # transformers
440
+ transformers==4.56.2
441
+ # via sentence-transformers
442
+ triton==3.4.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
443
+ # via torch
444
+ typer==0.16.1 ; sys_platform != 'emscripten'
445
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
446
  # via gradio
447
  typing-extensions==4.15.0
448
  # via
 
455
  # huggingface-hub
456
  # pydantic
457
  # pydantic-core
458
+ <<<<<<< HEAD
459
+ # starlette
460
+ =======
461
+ # sentence-transformers
462
  # starlette
463
+ # torch
464
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
465
  # typer
466
  # typing-inspection
467
  typing-inspection==0.4.1
 
469
  tzdata==2025.2
470
  # via pandas
471
  urllib3==2.5.0
472
+ <<<<<<< HEAD
473
  # via requests
474
  uvicorn==0.35.0
475
  # via gradio
476
+ =======
477
+ # via
478
+ # gradio
479
+ # requests
480
+ uvicorn==0.35.0 ; sys_platform != 'emscripten'
481
+ # via gradio
482
+ watchfiles==1.1.0
483
+ >>>>>>> 660f6fb (organizing files into src folder etc.)
484
  websockets==15.0.1
485
  # via gradio-client
486
  xxhash==3.5.0
src/app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ import gradio as gr
5
+ from cerebras.cloud.sdk import Cerebras
6
+ from dotenv import load_dotenv
7
+
8
+ from utils.utils import get_pinyin
9
+
10
+ # ======================
11
+ # Config
12
+ # ======================
13
+ load_dotenv()
14
+
15
+ MODEL = "gpt-oss-120b"
16
+ USE_MOCK = False # ✅ Toggle between mock and real API
17
+
18
+ # ======================
19
+ # Idiom dataset
20
+ # ======================
21
+ IDIOM_FILE_PATH = "idiom_dataset/chid_idiom_reference.json"
22
+ with open(IDIOM_FILE_PATH, "r", encoding="utf-8") as f:
23
+ idiom_list = json.load(f)
24
+ VALID_IDIOMS = set(idiom_list)
25
+
26
+ # ======================
27
+ # Instantiate client (if not mocking)
28
+ # ======================
29
+ CLIENT = None
30
+ if not USE_MOCK:
31
+ CLIENT = Cerebras(api_key=os.environ.get("CEREBRAS_API_KEY"))
32
+
33
+
34
+ # ======================
35
+ # Mock function for UI testing
36
+ # ======================
37
+ def generate_idiom_mock():
38
+ idiom = "对症下药"
39
+ explanation = """duì zhèng xià yào<br><br>
40
+ To prescribe the right medicine; to take the right approach to a problem."""
41
+ return idiom, explanation
42
+
43
+
44
+ # ======================
45
+ # Real API function
46
+ # ======================
47
+
48
+
49
+ def generate_idiom(situation: str):
50
+ prompt = f"""You are a wise assistant. Given a situation, respond with exactly:
51
+ 1. A Chinese idiom (includes 成語、俗語、諺語),
52
+ written in simplified Chinese characters,
53
+ that conveys the idea of the given situation.
54
+ 2. Its literal English translation
55
+ 3. Explain idiom. Keep explanation to 2-3 concise sentences.
56
+
57
+ Format:
58
+ Idiom
59
+ Literal translation
60
+ Explanation
61
+
62
+ Situation: {situation}
63
+ Answer:"""
64
+
65
+ response = CLIENT.chat.completions.create(
66
+ model=MODEL,
67
+ messages=[{"role": "user", "content": prompt}],
68
+ )
69
+ print(response)
70
+
71
+ generated_text = response.choices[0].message.content.strip()
72
+ lines = [line.strip() for line in generated_text.split("\n") if line.strip()]
73
+
74
+ llm_idiom = lines[0] if lines else generated_text
75
+
76
+ if llm_idiom not in VALID_IDIOMS:
77
+ explanation = "The LLM generated an invalid idiom. Try again!"
78
+ return llm_idiom, explanation
79
+
80
+ pinyin_text = get_pinyin(llm_idiom)
81
+
82
+ if len(lines) >= 3:
83
+ translation = lines[1]
84
+ meaning = " ".join(lines[2:])
85
+ explanation = f"{pinyin_text}<br><br>{translation}<br><br>{meaning}"
86
+ else:
87
+ explanation = f"{pinyin_text}<br><br>{' '.join(lines[1:])}"
88
+
89
+ return llm_idiom, explanation
90
+
91
+
92
+ # ======================
93
+ # UI Wrapper
94
+ # ======================
95
+ def update_ui(situation):
96
+ if USE_MOCK:
97
+ idiom, explanation = generate_idiom_mock()
98
+ else:
99
+ idiom, explanation = generate_idiom(situation)
100
+
101
+ return (
102
+ f"<div class='idiom-output'>{idiom}</div>",
103
+ f"<div class='explanation-output'>{explanation}</div>",
104
+ )
105
+
106
+
107
+ # ======================
108
+ # Launch app
109
+ # ======================
110
+ def launch_app():
111
+ with gr.Blocks(css="style.css") as demo:
112
+ gr.Markdown("# 🎋 Chinese Idiom Finder")
113
+
114
+ with gr.Row():
115
+ with gr.Column():
116
+ situation = gr.Textbox(
117
+ label="Enter a situation",
118
+ lines=2,
119
+ placeholder="e.g., When facing a big challenge",
120
+ )
121
+ generate_btn = gr.Button("✨ Find Idiom")
122
+
123
+ # ✅ Example situations
124
+ gr.Examples(
125
+ examples=[
126
+ ["When facing a big challenge"],
127
+ ["When someone helps you in a time of need"],
128
+ ["When you need to stay calm under pressure"],
129
+ ["When teamwork is important to succeed"],
130
+ ["When rushing leads to mistakes"],
131
+ ],
132
+ inputs=situation,
133
+ )
134
+
135
+ with gr.Column():
136
+ idiom_output = gr.HTML(label="Idiom")
137
+ explanation_output = gr.HTML(label="Explanation")
138
+
139
+ # pylint: disable=no-member
140
+ generate_btn.click(
141
+ fn=update_ui, inputs=situation, outputs=[idiom_output, explanation_output]
142
+ )
143
+
144
+ demo.launch()
145
+
146
+
147
+ if __name__ == "__main__":
148
+ launch_app()
src/run.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from multiprocessing import freeze_support
2
+
3
+ from watchfiles import run_process
4
+
5
+ import app # Import app module
6
+
7
+
8
+ def start_app():
9
+ app.launch_app()
10
+
11
+
12
+ if __name__ == "__main__":
13
+ freeze_support()
14
+ run_process(".", target=start_app)
src/style.css ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ========================
2
+ Dark Mode Default
3
+ ======================== */
4
+ body, .gradio-container {
5
+ background-color: #1a1a2e;
6
+ color: #f0f0f0;
7
+ font-family: Arial, sans-serif;
8
+ }
9
+
10
+ .idiom-output {
11
+ font-size: 2rem;
12
+ font-weight: bold;
13
+ text-align: center;
14
+ color: #ff6f61;
15
+ margin-bottom: 0.5em;
16
+ }
17
+
18
+ .explanation-output {
19
+ font-size: 1rem;
20
+ line-height: 1.5;
21
+ color: #dcdcdc;
22
+ text-align: center;
23
+ }
24
+
25
+ /* Buttons */
26
+ .gradio-container .gr-button {
27
+ background-color: #3a3a5e;
28
+ color: #ffffff;
29
+ border: 1px solid #5c5c8a;
30
+ font-weight: bold;
31
+ }
32
+ .gradio-container .gr-button:hover {
33
+ background-color: #505080;
34
+ }
35
+
36
+ /* Textboxes */
37
+ .gradio-container .gr-textbox textarea {
38
+ background-color: #2a2a45;
39
+ color: #ffffff;
40
+ border: 1px solid #5c5c8a;
41
+ }
42
+
43
+ /* Examples */
44
+ .gradio-container .gr-examples {
45
+ background-color: #2a2a45;
46
+ border: 1px solid #5c5c8a;
47
+ color: #ffffff;
48
+ }
src/utils/__init__.py ADDED
File without changes
src/utils/utils.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from pypinyin import Style, pinyin
2
+
3
+
4
+ def get_pinyin(text: str):
5
+ """Convert Chinese characters to pinyin with tones."""
6
+ py_list = pinyin(text, style=Style.TONE, heteronym=False)
7
+ return " ".join([syllable[0] for syllable in py_list])