Spaces:
Running
Running
Improve the presentation of outputs in frontend
Browse files- README.md +2 -1
- commafixer/src/baseline.py +1 -1
- commafixer/src/fixer.py +3 -3
- notebooks/evaluation.ipynb +2 -1
- static/index.html +2 -1
- static/script.js +4 -2
README.md
CHANGED
|
@@ -113,7 +113,8 @@ In our approach, for each comma from the prediction text obtained from the model
|
|
| 113 |
* If a comma from ground truth is not predicted, it counts as a false negative.
|
| 114 |
|
| 115 |
## Training
|
| 116 |
-
The fine-tuned model
|
|
|
|
| 117 |
|
| 118 |
To compare with the baseline, we fine-tune the same model, RoBERTa large, on the wikitext English dataset.
|
| 119 |
We use a similar approach, where we treat comma-fixing as a NER problem, and for each token predict whether a comma
|
|
|
|
| 113 |
* If a comma from ground truth is not predicted, it counts as a false negative.
|
| 114 |
|
| 115 |
## Training
|
| 116 |
+
The fine-tuned model is the [klasocki/roberta-large-lora-ner-comma-fixer](https://huggingface.co/klasocki/roberta-large-lora-ner-comma-fixer).
|
| 117 |
+
Further description can be found in the model card.
|
| 118 |
|
| 119 |
To compare with the baseline, we fine-tune the same model, RoBERTa large, on the wikitext English dataset.
|
| 120 |
We use a similar approach, where we treat comma-fixing as a NER problem, and for each token predict whether a comma
|
commafixer/src/baseline.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, NerPipeline
|
| 2 |
import re
|
| 3 |
|
| 4 |
-
from
|
| 5 |
|
| 6 |
|
| 7 |
class BaselineCommaFixer(CommaFixerInterface):
|
|
|
|
| 1 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, NerPipeline
|
| 2 |
import re
|
| 3 |
|
| 4 |
+
from comma_fixer_interface import CommaFixerInterface
|
| 5 |
|
| 6 |
|
| 7 |
class BaselineCommaFixer(CommaFixerInterface):
|
commafixer/src/fixer.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
from peft import PeftConfig, PeftModel
|
| 2 |
-
from transformers import AutoTokenizer, AutoModelForTokenClassification,
|
| 3 |
import nltk
|
| 4 |
import re
|
| 5 |
|
| 6 |
-
from
|
| 7 |
|
| 8 |
|
| 9 |
class CommaFixer(CommaFixerInterface):
|
|
@@ -11,7 +11,7 @@ class CommaFixer(CommaFixerInterface):
|
|
| 11 |
A wrapper class for the fine-tuned comma fixer model.
|
| 12 |
"""
|
| 13 |
|
| 14 |
-
def __init__(self
|
| 15 |
self.id2label = {0: 'O', 1: 'B-COMMA'}
|
| 16 |
self.label2id = {'O': 0, 'B-COMMA': 1}
|
| 17 |
self.model, self.tokenizer = self._load_peft_model()
|
|
|
|
| 1 |
from peft import PeftConfig, PeftModel
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification, RobertaTokenizerFast
|
| 3 |
import nltk
|
| 4 |
import re
|
| 5 |
|
| 6 |
+
from comma_fixer_interface import CommaFixerInterface
|
| 7 |
|
| 8 |
|
| 9 |
class CommaFixer(CommaFixerInterface):
|
|
|
|
| 11 |
A wrapper class for the fine-tuned comma fixer model.
|
| 12 |
"""
|
| 13 |
|
| 14 |
+
def __init__(self):
|
| 15 |
self.id2label = {0: 'O', 1: 'B-COMMA'}
|
| 16 |
self.label2id = {'O': 0, 'B-COMMA': 1}
|
| 17 |
self.model, self.tokenizer = self._load_peft_model()
|
notebooks/evaluation.ipynb
CHANGED
|
@@ -3281,7 +3281,8 @@
|
|
| 3281 |
{
|
| 3282 |
"cell_type": "code",
|
| 3283 |
"source": [
|
| 3284 |
-
"# comma_fixer.model = comma_fixer.model.cuda()"
|
|
|
|
| 3285 |
],
|
| 3286 |
"metadata": {
|
| 3287 |
"id": "ePP_WzS7XeYC"
|
|
|
|
| 3281 |
{
|
| 3282 |
"cell_type": "code",
|
| 3283 |
"source": [
|
| 3284 |
+
"# comma_fixer.model = comma_fixer.model.cuda() # TODO make this work and evaluate on test in the notebook as well. In\n",
|
| 3285 |
+
"# training eval on test was ~ same F1"
|
| 3286 |
],
|
| 3287 |
"metadata": {
|
| 3288 |
"id": "ePP_WzS7XeYC"
|
static/index.html
CHANGED
|
@@ -37,7 +37,8 @@
|
|
| 37 |
value="This is however a very bad, and terrible sentence grammatically that is."
|
| 38 |
/>
|
| 39 |
<button id="comma-fixing-submit">Submit</button>
|
| 40 |
-
<p class="comma-fixing-output"></p>
|
|
|
|
| 41 |
</form>
|
| 42 |
</section>
|
| 43 |
</main>
|
|
|
|
| 37 |
value="This is however a very bad, and terrible sentence grammatically that is."
|
| 38 |
/>
|
| 39 |
<button id="comma-fixing-submit">Submit</button>
|
| 40 |
+
<p class="comma-fixing-main-output"></p>
|
| 41 |
+
<p class="comma-fixing-baseline-output"></p>
|
| 42 |
</form>
|
| 43 |
</section>
|
| 44 |
</main>
|
static/script.js
CHANGED
|
@@ -22,9 +22,11 @@ commaFixingForm.addEventListener("submit", async (event) => {
|
|
| 22 |
event.preventDefault();
|
| 23 |
|
| 24 |
const commaFixingInput = document.getElementById("comma-fixing-input");
|
| 25 |
-
const commaFixingParagraph = document.querySelector(".comma-fixing-output");
|
|
|
|
| 26 |
|
| 27 |
const fixed = await fixCommas(commaFixingInput.value);
|
| 28 |
|
| 29 |
-
commaFixingParagraph.textContent = `Our model: ${fixed.main}
|
|
|
|
| 30 |
});
|
|
|
|
| 22 |
event.preventDefault();
|
| 23 |
|
| 24 |
const commaFixingInput = document.getElementById("comma-fixing-input");
|
| 25 |
+
const commaFixingParagraph = document.querySelector(".comma-fixing-main-output");
|
| 26 |
+
const commaFixingBaselineParagraph = document.querySelector(".comma-fixing-baseline-output");
|
| 27 |
|
| 28 |
const fixed = await fixCommas(commaFixingInput.value);
|
| 29 |
|
| 30 |
+
commaFixingParagraph.textContent = `Our model: ${fixed.main}`
|
| 31 |
+
commaFixingBaselineParagraph.textContent = `Baseline model: ${fixed.baseline}`
|
| 32 |
});
|