Spaces:
Runtime error
Runtime error
Peter
commited on
Commit
·
8d9ed7d
1
Parent(s):
7afd604
🔊 ⚡️ add logs, improve params
Browse filesSigned-off-by: Peter <[email protected]>
- grammar_improve.py +6 -4
grammar_improve.py
CHANGED
|
@@ -3,6 +3,8 @@ grammar_improve.py - this .py script contains functions to improve the grammar o
|
|
| 3 |
|
| 4 |
"""
|
| 5 |
|
|
|
|
|
|
|
| 6 |
import math
|
| 7 |
import pprint as pp
|
| 8 |
import re
|
|
@@ -453,8 +455,8 @@ def correct_grammar(
|
|
| 453 |
n_results: int = 1,
|
| 454 |
beams: int = 8,
|
| 455 |
temp=1,
|
| 456 |
-
|
| 457 |
-
rep_penalty=
|
| 458 |
device="cpu",
|
| 459 |
):
|
| 460 |
"""
|
|
@@ -480,7 +482,7 @@ def correct_grammar(
|
|
| 480 |
st = time.perf_counter()
|
| 481 |
|
| 482 |
if len(tokenizer(input_text).input_ids) < 4:
|
| 483 |
-
|
| 484 |
return input_text
|
| 485 |
max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
|
| 486 |
batch = tokenizer(
|
|
@@ -494,7 +496,7 @@ def correct_grammar(
|
|
| 494 |
**batch,
|
| 495 |
max_length=max_length,
|
| 496 |
min_length=min(10, len(input_text)),
|
| 497 |
-
no_repeat_ngram_size=
|
| 498 |
repetition_penalty=rep_penalty,
|
| 499 |
num_beams=beams,
|
| 500 |
num_return_sequences=n_results,
|
|
|
|
| 3 |
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
import logging
|
| 7 |
+
logging.basicConfig(level=logging.INFO)
|
| 8 |
import math
|
| 9 |
import pprint as pp
|
| 10 |
import re
|
|
|
|
| 455 |
n_results: int = 1,
|
| 456 |
beams: int = 8,
|
| 457 |
temp=1,
|
| 458 |
+
no_repeat_ngram_size=4,
|
| 459 |
+
rep_penalty=2.5,
|
| 460 |
device="cpu",
|
| 461 |
):
|
| 462 |
"""
|
|
|
|
| 482 |
st = time.perf_counter()
|
| 483 |
|
| 484 |
if len(tokenizer(input_text).input_ids) < 4:
|
| 485 |
+
logging.info(f"input text of {input_text} is too short to be corrected")
|
| 486 |
return input_text
|
| 487 |
max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
|
| 488 |
batch = tokenizer(
|
|
|
|
| 496 |
**batch,
|
| 497 |
max_length=max_length,
|
| 498 |
min_length=min(10, len(input_text)),
|
| 499 |
+
no_repeat_ngram_size=no_repeat_ngram_size,
|
| 500 |
repetition_penalty=rep_penalty,
|
| 501 |
num_beams=beams,
|
| 502 |
num_return_sequences=n_results,
|