Spaces:

Dagobert42
/

Semantic-Frame-Augmentation

Sleeping

Dagobert42 commited on Feb 14, 2024

Commit

00a6def

1 Parent(s): 125cf0c

add color annotated results, use new models

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,41 +1,31 @@
 import torch
 import streamlit as st
-from transformers import pipeline
 from random import choice
 with open("sentences.pt", 'rb') as f:
     sentences = torch.load(f)
 sentence = choice(sentences)
-baseline_classifier = pipeline(
-    model="Dagobert42/mobilebert-uncased-biored-finetuned-ner",
-    task="ner",
-    aggregation_strategy="simple"
-    )
-augmented_classifier = pipeline(
-    model="Dagobert42/mobilebert-uncased-biored-augmented-ner",
-    task="ner",
-    aggregation_strategy="simple"
-    )
 st.title("Semantic Frame Augmentation")
-st.caption("Analysing difficult low-resource domains with only a handful of examples")
-st.write("This space uses a googel/mobilebert-uncased model for NER")
 augment = st.toggle('Use augmented model for NER', value=False)
-if augment:
-    st.write("with augmentation:")
-    tokens = augmented_classifier(sentence)
-else:
-    st.write("without augmentation:")
-    tokens = baseline_classifier(sentence)
 txt = st.text_area(
     "Text to analyze",
     sentence,
     max_chars=500
     )
 st.subheader("Entity analysis:")
-st.write(tokens)

 import torch
 import streamlit as st
 from random import choice
+from annotated_text import annotated_text
+from helpers import *
 with open("sentences.pt", 'rb') as f:
     sentences = torch.load(f)
 sentence = choice(sentences)
 st.title("Semantic Frame Augmentation")
+st.subheader("Analysing difficult low-resource domains with only a handful of examples")
+st.write("This space uses a google/mobilebert-uncased model for NER")
 augment = st.toggle('Use augmented model for NER', value=False)
 txt = st.text_area(
     "Text to analyze",
     sentence,
     max_chars=500
     )
+if augment:
+    st.write("with augmentation:")
+    tokens = augmented_classifier(txt)
+else:
+    st.write("without augmentation:")
+    tokens = baseline_classifier(txt)
 st.subheader("Entity analysis:")
+annotated_text(annotate_sentence(sentence, tokens))

helpers.py ADDED Viewed

+from transformers import pipeline
+baseline_classifier = pipeline("ner",
+    model="Dagobert42/biored-finetuned",
+    aggregation_strategy="simple"
+    )
+augmented_classifier = pipeline("ner",
+    model="Dagobert42/biored-augmented",
+    aggregation_strategy="simple"
+    )
+def annotate_sentence(sentence, predictions):
+    colors = {
+        'null': '#bfbfbf',  # Pastel gray
+        'GeneOrGeneProduct': '#aad4aa',  # Pastel green
+        'DiseaseOrPhenotypicFeature': '#f8b400',  # Pastel orange
+        'ChemicalEntity': '#a4c2f4',  # Pastel blue
+        'OrganismTaxon': '#ffb6c1',  # Pastel pink
+        'SequenceVariant': '#e2b0ff',  # Pastel purple
+        'CellLine': '#ffcc99'  # Pastel peach
+    }
+    output = []
+    i = 0
+    for p in predictions:
+        if sentence[i:p['start']] != '':
+            output.append(sentence[i:p['start']])
+        output.append((p['word'], p['entity_group'], colors[p['entity_group']]))
+        i = p['end']
+    if sentence[p['end']:]:
+        output.append(sentence[p['end']:])
+    return output

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 streamlit
 transformers
 torch

 streamlit
+st-annotated-text
 transformers
 torch