Spaces:
Runtime error
Runtime error
Commit
·
3cf2a36
1
Parent(s):
58ef0b0
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,74 +5,75 @@ from transformers import pipeline
|
|
| 5 |
model = pipeline("ner", model="/ner-app/mendobert/", tokenizer="indolem/indobert-base-uncased")
|
| 6 |
basemodel = pipeline("ner", model="/ner-app/base-model/", tokenizer="indolem/indobert-base-uncased")
|
| 7 |
|
| 8 |
-
|
| 9 |
-
ner_results2 = basemodel(text)
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
# MendoBERT
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
if result["word"].startswith("##"):
|
| 19 |
-
formatted_results[-1]["end"] = end
|
| 20 |
-
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
| 21 |
-
else:
|
| 22 |
-
formatted_results.append({
|
| 23 |
-
'start': result["start"],
|
| 24 |
-
'end': end,
|
| 25 |
-
'entity': result["entity"],
|
| 26 |
-
'index': result["index"],
|
| 27 |
-
'score': result["score"],
|
| 28 |
-
'word': result["word"]})
|
| 29 |
|
| 30 |
-
for result in formatted_results:
|
| 31 |
-
if result["entity"].startswith("LABEL_0"):
|
| 32 |
-
result["entity"] = "O"
|
| 33 |
-
elif result["entity"].startswith("LABEL_1"):
|
| 34 |
-
result["entity"] = "B"
|
| 35 |
-
elif result["entity"].startswith("LABEL_2"):
|
| 36 |
-
result["entity"] = "I"
|
| 37 |
-
|
| 38 |
-
mendo =[]
|
| 39 |
-
for result in formatted_results:
|
| 40 |
-
if not result["entity"].startswith("O"):
|
| 41 |
-
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
|
| 42 |
-
|
| 43 |
-
# Base Model
|
| 44 |
-
|
| 45 |
-
formatted_results = []
|
| 46 |
-
for result in ner_results2:
|
| 47 |
-
end = result["start"]+len(result["word"].replace("##", ""))
|
| 48 |
-
|
| 49 |
-
if result["word"].startswith("##"):
|
| 50 |
-
formatted_results[-1]["end"] = end
|
| 51 |
-
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
| 52 |
-
else:
|
| 53 |
-
formatted_results.append({
|
| 54 |
-
'start': result["start"],
|
| 55 |
-
'end': end,
|
| 56 |
-
'entity': result["entity"],
|
| 57 |
-
'index': result["index"],
|
| 58 |
-
'score': result["score"],
|
| 59 |
-
'word': result["word"]})
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
if text:
|
| 77 |
st.json(base)
|
| 78 |
-
st.json(mendo)
|
|
|
|
|
|
| 5 |
model = pipeline("ner", model="/ner-app/mendobert/", tokenizer="indolem/indobert-base-uncased")
|
| 6 |
basemodel = pipeline("ner", model="/ner-app/base-model/", tokenizer="indolem/indobert-base-uncased")
|
| 7 |
|
| 8 |
+
text = st.text_area('enter some text: ')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
if text:
|
| 11 |
+
ner_results = model(text)
|
| 12 |
+
ner_results2 = basemodel(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
# MendoBERT
|
| 16 |
+
|
| 17 |
+
formatted_results = []
|
| 18 |
+
for result in ner_results:
|
| 19 |
+
end = result["start"]+len(result["word"].replace("##", ""))
|
| 20 |
+
|
| 21 |
+
if result["word"].startswith("##"):
|
| 22 |
+
formatted_results[-1]["end"] = end
|
| 23 |
+
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
| 24 |
+
else:
|
| 25 |
+
formatted_results.append({
|
| 26 |
+
'start': result["start"],
|
| 27 |
+
'end': end,
|
| 28 |
+
'entity': result["entity"],
|
| 29 |
+
'index': result["index"],
|
| 30 |
+
'score': result["score"],
|
| 31 |
+
'word': result["word"]})
|
| 32 |
+
|
| 33 |
+
for result in formatted_results:
|
| 34 |
+
if result["entity"].startswith("LABEL_0"):
|
| 35 |
+
result["entity"] = "O"
|
| 36 |
+
elif result["entity"].startswith("LABEL_1"):
|
| 37 |
+
result["entity"] = "B"
|
| 38 |
+
elif result["entity"].startswith("LABEL_2"):
|
| 39 |
+
result["entity"] = "I"
|
| 40 |
+
|
| 41 |
+
mendo =[]
|
| 42 |
+
for result in formatted_results:
|
| 43 |
+
if not result["entity"].startswith("O"):
|
| 44 |
+
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
|
| 45 |
+
|
| 46 |
+
# Base Model
|
| 47 |
+
|
| 48 |
+
formatted_results = []
|
| 49 |
+
for result in ner_results2:
|
| 50 |
+
end = result["start"]+len(result["word"].replace("##", ""))
|
| 51 |
+
|
| 52 |
+
if result["word"].startswith("##"):
|
| 53 |
+
formatted_results[-1]["end"] = end
|
| 54 |
+
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
| 55 |
+
else:
|
| 56 |
+
formatted_results.append({
|
| 57 |
+
'start': result["start"],
|
| 58 |
+
'end': end,
|
| 59 |
+
'entity': result["entity"],
|
| 60 |
+
'index': result["index"],
|
| 61 |
+
'score': result["score"],
|
| 62 |
+
'word': result["word"]})
|
| 63 |
+
|
| 64 |
+
for result in formatted_results:
|
| 65 |
+
if result["entity"].startswith("LABEL_0"):
|
| 66 |
+
result["entity"] = "O"
|
| 67 |
+
elif result["entity"].startswith("LABEL_1"):
|
| 68 |
+
result["entity"] = "B"
|
| 69 |
+
elif result["entity"].startswith("LABEL_2"):
|
| 70 |
+
result["entity"] = "I"
|
| 71 |
+
|
| 72 |
+
base=[]
|
| 73 |
+
for result in formatted_results:
|
| 74 |
+
if not result["entity"].startswith("O"):
|
| 75 |
+
base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
|
| 76 |
|
|
|
|
| 77 |
st.json(base)
|
| 78 |
+
st.json(mendo)
|
| 79 |
+
|