Spaces:
Running
Running
Fix the tests and converting model results to strings
Browse files- app.py +3 -2
- src/baseline.py +13 -6
- tests/test_baseline.py +3 -3
- tests/test_integration.py +33 -18
app.py
CHANGED
|
@@ -9,7 +9,8 @@ logging.basicConfig(level=logging.INFO)
|
|
| 9 |
|
| 10 |
@app.route('/', methods=['GET'])
|
| 11 |
def root():
|
| 12 |
-
return ("Welcome to the comma fixer.
|
|
|
|
| 13 |
"out the functionality.")
|
| 14 |
|
| 15 |
|
|
@@ -17,7 +18,7 @@ def root():
|
|
| 17 |
def fix_commas_with_baseline():
|
| 18 |
data = request.get_json()
|
| 19 |
if 's' in data:
|
| 20 |
-
return make_response(jsonify({
|
| 21 |
else:
|
| 22 |
return make_response("Parameter 's' missing", 400)
|
| 23 |
|
|
|
|
| 9 |
|
| 10 |
@app.route('/', methods=['GET'])
|
| 11 |
def root():
|
| 12 |
+
return ("Welcome to the comma fixer. Send a POST request to /fix-commas or /baseline/fix-commas with a string "
|
| 13 |
+
"'s' in the JSON body to try "
|
| 14 |
"out the functionality.")
|
| 15 |
|
| 16 |
|
|
|
|
| 18 |
def fix_commas_with_baseline():
|
| 19 |
data = request.get_json()
|
| 20 |
if 's' in data:
|
| 21 |
+
return make_response(jsonify({'s': fix_commas(app.baseline_pipeline, data['s'])}), 200)
|
| 22 |
else:
|
| 23 |
return make_response("Parameter 's' missing", 400)
|
| 24 |
|
src/baseline.py
CHANGED
|
@@ -14,16 +14,23 @@ def _remove_punctuation(s: str) -> str:
|
|
| 14 |
return s
|
| 15 |
|
| 16 |
|
| 17 |
-
def _convert_pipeline_json_to_string(pipeline_json: list[dict]) -> str:
|
| 18 |
# TODO is it ok to remove redundant spaces, or should we keep input data as is and only touch commas?
|
| 19 |
# TODO don't accept tokens with commas inside words
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
def fix_commas(ner_pipeline: NerPipeline, s: str) -> str:
|
| 27 |
return _convert_pipeline_json_to_string(
|
| 28 |
-
ner_pipeline(_remove_punctuation(s))
|
|
|
|
| 29 |
)
|
|
|
|
| 14 |
return s
|
| 15 |
|
| 16 |
|
| 17 |
+
def _convert_pipeline_json_to_string(pipeline_json: list[dict], original_s: str) -> str:
|
| 18 |
# TODO is it ok to remove redundant spaces, or should we keep input data as is and only touch commas?
|
| 19 |
# TODO don't accept tokens with commas inside words
|
| 20 |
+
result = original_s.replace(',', '') # We will fix the commas, but keep everything else intact
|
| 21 |
+
current_offset = 0
|
| 22 |
+
for i in range(1, len(pipeline_json)):
|
| 23 |
+
current_word = pipeline_json[i - 1]['word'].replace('▁', '')
|
| 24 |
+
current_offset = result.find(current_word, current_offset) + len(current_word)
|
| 25 |
+
# Only insert commas for the final token of a word
|
| 26 |
+
if pipeline_json[i - 1]['entity'] == ',' and pipeline_json[i]['word'].startswith('▁'):
|
| 27 |
+
result = result[:current_offset] + ',' + result[current_offset:]
|
| 28 |
+
current_offset += 1
|
| 29 |
+
return result
|
| 30 |
|
| 31 |
|
| 32 |
def fix_commas(ner_pipeline: NerPipeline, s: str) -> str:
|
| 33 |
return _convert_pipeline_json_to_string(
|
| 34 |
+
ner_pipeline(_remove_punctuation(s)),
|
| 35 |
+
s
|
| 36 |
)
|
tests/test_baseline.py
CHANGED
|
@@ -21,9 +21,9 @@ def test_fix_commas_leaves_correct_strings_unchanged(baseline_pipeline, test_inp
|
|
| 21 |
@pytest.mark.parametrize(
|
| 22 |
"test_input, expected",
|
| 23 |
[
|
| 24 |
-
['I, am', 'I am.'],
|
| 25 |
-
['A complex clause however it misses a comma something else and a dot
|
| 26 |
-
'A complex
|
| 27 |
)
|
| 28 |
def test_fix_commas_fixes_incorrect_commas(baseline_pipeline, test_input, expected):
|
| 29 |
result = fix_commas(baseline_pipeline, s=test_input)
|
|
|
|
| 21 |
@pytest.mark.parametrize(
|
| 22 |
"test_input, expected",
|
| 23 |
[
|
| 24 |
+
['I, am.', 'I am.'],
|
| 25 |
+
['A complex clause however it misses a comma something else and a dot...?',
|
| 26 |
+
'A complex clause, however, it misses a comma, something else and a dot...?']]
|
| 27 |
)
|
| 28 |
def test_fix_commas_fixes_incorrect_commas(baseline_pipeline, test_input, expected):
|
| 29 |
result = fix_commas(baseline_pipeline, s=test_input)
|
tests/test_integration.py
CHANGED
|
@@ -1,34 +1,49 @@
|
|
| 1 |
-
import json
|
|
|
|
| 2 |
|
| 3 |
from app import app
|
| 4 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
-
def
|
| 8 |
-
response =
|
| 9 |
assert response.status_code == 400
|
| 10 |
|
| 11 |
|
| 12 |
@pytest.mark.parametrize(
|
| 13 |
"test_input",
|
| 14 |
-
[
|
| 15 |
-
|
| 16 |
-
|
| 17 |
)
|
| 18 |
-
def test_fix_commas_plain_string_unchanged(test_input: str):
|
| 19 |
-
response =
|
| 20 |
-
|
| 21 |
-
# result = json.loads(response.data.decode('utf-8')).get('s')
|
| 22 |
assert response.status_code == 200
|
| 23 |
-
|
| 24 |
|
| 25 |
|
| 26 |
@pytest.mark.parametrize(
|
| 27 |
"test_input, expected",
|
| 28 |
-
[['', ''],
|
| 29 |
-
['
|
| 30 |
-
|
| 31 |
-
'This test string should not have any commas inside it.']]
|
| 32 |
)
|
| 33 |
-
def test_fix_commas_fixes_wrong_commas(test_input: str, expected: str):
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import json
|
| 2 |
+
import pytest
|
| 3 |
|
| 4 |
from app import app
|
| 5 |
+
from baseline import create_baseline_pipeline
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@pytest.fixture()
|
| 9 |
+
def client():
|
| 10 |
+
app.config["DEBUG"] = True
|
| 11 |
+
app.config["TESTING"] = True
|
| 12 |
+
app.baseline_pipeline = create_baseline_pipeline()
|
| 13 |
+
yield app.test_client()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def test_fix_commas_fails_on_no_parameter(client):
|
| 17 |
+
response = client.post('/baseline/fix-commas/')
|
| 18 |
+
assert response.status_code == 400
|
| 19 |
|
| 20 |
|
| 21 |
+
def test_fix_commas_fails_on_wrong_parameters(client):
|
| 22 |
+
response = client.post('/baseline/fix-commas/', json={'text': "Some text."})
|
| 23 |
assert response.status_code == 400
|
| 24 |
|
| 25 |
|
| 26 |
@pytest.mark.parametrize(
|
| 27 |
"test_input",
|
| 28 |
+
['',
|
| 29 |
+
'Hello world.',
|
| 30 |
+
'This test string should not have any commas inside it.']
|
| 31 |
)
|
| 32 |
+
def test_fix_commas_plain_string_unchanged(client, test_input: str):
|
| 33 |
+
response = client.post('/baseline/fix-commas/', json={'s': test_input})
|
| 34 |
+
|
|
|
|
| 35 |
assert response.status_code == 200
|
| 36 |
+
assert response.get_json().get('s') == test_input
|
| 37 |
|
| 38 |
|
| 39 |
@pytest.mark.parametrize(
|
| 40 |
"test_input, expected",
|
| 41 |
+
[['I am, here.', 'I am here.'],
|
| 42 |
+
['books pens and pencils',
|
| 43 |
+
'books, pens and pencils.']]
|
|
|
|
| 44 |
)
|
| 45 |
+
def test_fix_commas_fixes_wrong_commas(client, test_input: str, expected: str):
|
| 46 |
+
response = client.post('/baseline/fix-commas/', json={'s': test_input})
|
| 47 |
+
|
| 48 |
+
assert response.status_code == 200
|
| 49 |
+
assert response.get_json().get('s') == expected
|