Spaces:
Runtime error
Runtime error
DeDeckerThomas
commited on
Commit
·
860c19c
1
Parent(s):
24f3280
Fix empty keyphrase bug
Browse files
extraction/keyphrase_extraction_pipeline.py
DELETED
|
@@ -1,24 +0,0 @@
|
|
| 1 |
-
from transformers import (
|
| 2 |
-
TokenClassificationPipeline,
|
| 3 |
-
AutoModelForTokenClassification,
|
| 4 |
-
AutoTokenizer,
|
| 5 |
-
)
|
| 6 |
-
from transformers.pipelines import AggregationStrategy
|
| 7 |
-
import numpy as np
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
class KeyphraseExtractionPipeline(TokenClassificationPipeline):
|
| 11 |
-
def __init__(self, model, *args, **kwargs):
|
| 12 |
-
super().__init__(
|
| 13 |
-
model=AutoModelForTokenClassification.from_pretrained(model),
|
| 14 |
-
tokenizer=AutoTokenizer.from_pretrained(model),
|
| 15 |
-
*args,
|
| 16 |
-
**kwargs
|
| 17 |
-
)
|
| 18 |
-
|
| 19 |
-
def postprocess(self, model_outputs):
|
| 20 |
-
results = super().postprocess(
|
| 21 |
-
model_outputs=model_outputs,
|
| 22 |
-
aggregation_strategy=AggregationStrategy.SIMPLE,
|
| 23 |
-
)
|
| 24 |
-
return np.unique([result.get("word").strip() for result in results])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extraction/keyphrase_generation_pipeline.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
| 1 |
-
from transformers import (
|
| 2 |
-
Text2TextGenerationPipeline,
|
| 3 |
-
AutoModelForSeq2SeqLM,
|
| 4 |
-
AutoTokenizer,
|
| 5 |
-
)
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
|
| 9 |
-
def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
|
| 10 |
-
super().__init__(
|
| 11 |
-
model=AutoModelForSeq2SeqLM.from_pretrained(model),
|
| 12 |
-
tokenizer=AutoTokenizer.from_pretrained(model),
|
| 13 |
-
*args,
|
| 14 |
-
**kwargs
|
| 15 |
-
)
|
| 16 |
-
self.keyphrase_sep_token = keyphrase_sep_token
|
| 17 |
-
|
| 18 |
-
def postprocess(self, model_outputs):
|
| 19 |
-
results = super().postprocess(model_outputs=model_outputs)
|
| 20 |
-
return [
|
| 21 |
-
[
|
| 22 |
-
keyphrase.strip()
|
| 23 |
-
for keyphrase in result.get("generated_text").split(
|
| 24 |
-
self.keyphrase_sep_token
|
| 25 |
-
)
|
| 26 |
-
]
|
| 27 |
-
for result in results
|
| 28 |
-
][0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc
CHANGED
|
Binary files a/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc differ
|
|
|
pipelines/keyphrase_generation_pipeline.py
CHANGED
|
@@ -24,6 +24,7 @@ class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
|
|
| 24 |
for keyphrase in result.get("generated_text").split(
|
| 25 |
self.keyphrase_sep_token
|
| 26 |
)
|
|
|
|
| 27 |
]
|
| 28 |
for result in results
|
| 29 |
][0]
|
|
|
|
| 24 |
for keyphrase in result.get("generated_text").split(
|
| 25 |
self.keyphrase_sep_token
|
| 26 |
)
|
| 27 |
+
if keyphrase != ""
|
| 28 |
]
|
| 29 |
for result in results
|
| 30 |
][0]
|