Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
π§ streamline vars
Browse filesSigned-off-by: peter szemraj <[email protected]>
app.py
CHANGED
|
@@ -241,21 +241,24 @@ def proc_submission(
|
|
| 241 |
|
| 242 |
st = time.perf_counter()
|
| 243 |
history = {}
|
| 244 |
-
|
| 245 |
-
|
| 246 |
logging.info(
|
| 247 |
-
f"pre-truncation word count: {len(contraction_aware_tokenize(
|
|
|
|
|
|
|
|
|
|
| 248 |
)
|
| 249 |
-
truncation_validated = truncate_word_count(clean_text, max_words=max_input_length)
|
| 250 |
|
| 251 |
if truncation_validated["was_truncated"]:
|
| 252 |
model_input_text = truncation_validated["processed_text"]
|
| 253 |
# create elaborate HTML warning
|
| 254 |
-
input_wc =
|
| 255 |
msg = f"""
|
| 256 |
<div style="background-color: #FFA500; color: white; padding: 20px;">
|
| 257 |
<h3>Warning</h3>
|
| 258 |
<p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
|
|
|
|
| 259 |
</div>
|
| 260 |
"""
|
| 261 |
logging.warning(msg)
|
|
|
|
| 241 |
|
| 242 |
st = time.perf_counter()
|
| 243 |
history = {}
|
| 244 |
+
cln_text = clean(input_text, lower=False)
|
| 245 |
+
parsed_cln_text = remove_stopwords(cln_text) if predrop_stopwords else cln_text
|
| 246 |
logging.info(
|
| 247 |
+
f"pre-truncation word count: {len(contraction_aware_tokenize(parsed_cln_text))}"
|
| 248 |
+
)
|
| 249 |
+
truncation_validated = truncate_word_count(
|
| 250 |
+
parsed_cln_text, max_words=max_input_length
|
| 251 |
)
|
|
|
|
| 252 |
|
| 253 |
if truncation_validated["was_truncated"]:
|
| 254 |
model_input_text = truncation_validated["processed_text"]
|
| 255 |
# create elaborate HTML warning
|
| 256 |
+
input_wc = len(contraction_aware_tokenize(parsed_cln_text))
|
| 257 |
msg = f"""
|
| 258 |
<div style="background-color: #FFA500; color: white; padding: 20px;">
|
| 259 |
<h3>Warning</h3>
|
| 260 |
<p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
|
| 261 |
+
<p>Dropping stopwords is set to {predrop_stopwords}. If this is not what you intended, please validate the advanced settings.</p>
|
| 262 |
</div>
|
| 263 |
"""
|
| 264 |
logging.warning(msg)
|