Spaces:

klasocki
/

comma-fixer

Running

klasocki commited on Aug 18, 2023

Commit

35c0239

1 Parent(s): 5760b44

Fix the tests and converting model results to strings

Files changed (4) hide show

app.py CHANGED Viewed

@@ -9,7 +9,8 @@ logging.basicConfig(level=logging.INFO)
 @app.route('/', methods=['GET'])
 def root():
-    return ("Welcome to the comma fixer. Go to /fix-commas?s='some text' or /baseline/fix-commas?s='some text' to try "
             "out the functionality.")
@@ -17,7 +18,7 @@ def root():
 def fix_commas_with_baseline():
     data = request.get_json()
     if 's' in data:
-        return make_response(jsonify({"s": fix_commas(app.baseline_pipeline, data['s'])}), 200)
     else:
         return make_response("Parameter 's' missing", 400)

 @app.route('/', methods=['GET'])
 def root():
+    return ("Welcome to the comma fixer. Send a POST request to /fix-commas or /baseline/fix-commas with a string "
+            "'s' in the JSON body to try "
             "out the functionality.")
 def fix_commas_with_baseline():
     data = request.get_json()
     if 's' in data:
+        return make_response(jsonify({'s': fix_commas(app.baseline_pipeline, data['s'])}), 200)
     else:
         return make_response("Parameter 's' missing", 400)

src/baseline.py CHANGED Viewed

@@ -14,16 +14,23 @@ def _remove_punctuation(s: str) -> str:
     return s
-def _convert_pipeline_json_to_string(pipeline_json: list[dict]) -> str:
     # TODO is it ok to remove redundant spaces, or should we keep input data as is and only touch commas?
     # TODO don't accept tokens with commas inside words
-    return ''.join(
-        token['word'].replace('▁', ' ') + token['entity'].replace('0', '')
-        for token in pipeline_json
-    ).strip()
 def fix_commas(ner_pipeline: NerPipeline, s: str) -> str:
     return _convert_pipeline_json_to_string(
-        ner_pipeline(_remove_punctuation(s))
     )

     return s
+def _convert_pipeline_json_to_string(pipeline_json: list[dict], original_s: str) -> str:
     # TODO is it ok to remove redundant spaces, or should we keep input data as is and only touch commas?
     # TODO don't accept tokens with commas inside words
+    result = original_s.replace(',', '') # We will fix the commas, but keep everything else intact
+    current_offset = 0
+    for i in range(1, len(pipeline_json)):
+        current_word = pipeline_json[i - 1]['word'].replace('▁', '')
+        current_offset = result.find(current_word, current_offset) + len(current_word)
+        # Only insert commas for the final token of a word
+        if pipeline_json[i - 1]['entity'] == ',' and pipeline_json[i]['word'].startswith('▁'):
+            result = result[:current_offset] + ',' + result[current_offset:]
+            current_offset += 1
+    return result
 def fix_commas(ner_pipeline: NerPipeline, s: str) -> str:
     return _convert_pipeline_json_to_string(
+        ner_pipeline(_remove_punctuation(s)),
+        s
     )

tests/test_baseline.py CHANGED Viewed

@@ -21,9 +21,9 @@ def test_fix_commas_leaves_correct_strings_unchanged(baseline_pipeline, test_inp
 @pytest.mark.parametrize(
     "test_input, expected",
     [
-        ['I, am', 'I am.'],
-        ['A complex     clause however it misses a comma something else and a dot?',
-         'A complex claus,e, however, it misses a comma, something else and a dot.']]
 )
 def test_fix_commas_fixes_incorrect_commas(baseline_pipeline, test_input, expected):
     result = fix_commas(baseline_pipeline, s=test_input)

 @pytest.mark.parametrize(
     "test_input, expected",
     [
+        ['I, am.', 'I am.'],
+        ['A complex     clause however it misses a comma something else and a dot...?',
+         'A complex     clause, however, it misses a comma, something else and a dot...?']]
 )
 def test_fix_commas_fixes_incorrect_commas(baseline_pipeline, test_input, expected):
     result = fix_commas(baseline_pipeline, s=test_input)

tests/test_integration.py CHANGED Viewed

@@ -1,34 +1,49 @@
-import json
 from app import app
-import pytest
-def test_fix_commas_fails_on_no_parameter():
-    response = app.test_client().post('/baseline/fix-commas/')
     assert response.status_code == 400
 @pytest.mark.parametrize(
     "test_input",
-    [[''],
-     ['Hello world.'],
-     ['This test string should not have any commas inside it.']]
 )
-def test_fix_commas_plain_string_unchanged(test_input: str):
-    response = app.test_client().post('/baseline/fix-commas/', data={'s': test_input})
-    print(response.data.decode('utf-8'))
-    # result = json.loads(response.data.decode('utf-8')).get('s')
     assert response.status_code == 200
-    # assert result == test_input
 @pytest.mark.parametrize(
     "test_input, expected",
-    [['', ''],
-     ['Hello world.', 'Hello world.'],
-     ['This test string should not have any commas inside it.',
-      'This test string should not have any commas inside it.']]
 )
-def test_fix_commas_fixes_wrong_commas(test_input: str, expected: str):
-    assert False

+from flask import json
+import pytest
 from app import app
+from baseline import create_baseline_pipeline
+@pytest.fixture()
+def client():
+    app.config["DEBUG"] = True
+    app.config["TESTING"] = True
+    app.baseline_pipeline = create_baseline_pipeline()
+    yield app.test_client()
+def test_fix_commas_fails_on_no_parameter(client):
+    response = client.post('/baseline/fix-commas/')
+    assert response.status_code == 400
+def test_fix_commas_fails_on_wrong_parameters(client):
+    response = client.post('/baseline/fix-commas/', json={'text': "Some text."})
     assert response.status_code == 400
 @pytest.mark.parametrize(
     "test_input",
+    ['',
+     'Hello world.',
+     'This test string should not have any commas inside it.']
 )
+def test_fix_commas_plain_string_unchanged(client, test_input: str):
+    response = client.post('/baseline/fix-commas/', json={'s': test_input})
     assert response.status_code == 200
+    assert response.get_json().get('s') == test_input
 @pytest.mark.parametrize(
     "test_input, expected",
+    [['I am, here.', 'I am here.'],
+     ['books pens and pencils',
+      'books, pens and pencils.']]
 )
+def test_fix_commas_fixes_wrong_commas(client, test_input: str, expected: str):
+    response = client.post('/baseline/fix-commas/', json={'s': test_input})
+    assert response.status_code == 200
+    assert response.get_json().get('s') == expected