Spaces:

ethzanalytics
/

gpt2-xl-conversational

Runtime error

Peter commited on Oct 20, 2022

Commit

8d9ed7d

1 Parent(s): 7afd604

🔊 ⚡️ add logs, improve params

Signed-off-by: Peter <[email protected]>

Files changed (1) hide show

grammar_improve.py CHANGED Viewed

@@ -3,6 +3,8 @@ grammar_improve.py - this .py script contains functions to improve the grammar o
 """
 import math
 import pprint as pp
 import re
@@ -453,8 +455,8 @@ def correct_grammar(
     n_results: int = 1,
     beams: int = 8,
     temp=1,
-    uniq_ngrams=2,
-    rep_penalty=1.5,
     device="cpu",
 ):
     """
@@ -480,7 +482,7 @@ def correct_grammar(
     st = time.perf_counter()
     if len(tokenizer(input_text).input_ids) < 4:
-        print(f"input text of {input_text} is too short to be corrected")
         return input_text
     max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
     batch = tokenizer(
@@ -494,7 +496,7 @@ def correct_grammar(
         **batch,
         max_length=max_length,
         min_length=min(10, len(input_text)),
-        no_repeat_ngram_size=uniq_ngrams,
         repetition_penalty=rep_penalty,
         num_beams=beams,
         num_return_sequences=n_results,

 """
+import logging
+logging.basicConfig(level=logging.INFO)
 import math
 import pprint as pp
 import re
     n_results: int = 1,
     beams: int = 8,
     temp=1,
+    no_repeat_ngram_size=4,
+    rep_penalty=2.5,
     device="cpu",
 ):
     """
     st = time.perf_counter()
     if len(tokenizer(input_text).input_ids) < 4:
+        logging.info(f"input text of {input_text} is too short to be corrected")
         return input_text
     max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
     batch = tokenizer(
         **batch,
         max_length=max_length,
         min_length=min(10, len(input_text)),
+        no_repeat_ngram_size=no_repeat_ngram_size,
         repetition_penalty=rep_penalty,
         num_beams=beams,
         num_return_sequences=n_results,