Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from transformers import pipeline | |
| from ipymarkup import format_span_box_markup | |
| # Load the pre-trained NER model | |
| model = pipeline("ner", model="/home/user/app/mendobert/", tokenizer="indolem/indobert-base-uncased") | |
| basemodel = pipeline("ner", model="/home/user/app/base-model/", tokenizer="indolem/indobert-base-uncased") | |
| st.title(':blue[MendoBERT] - Named Entity Recognition :sunglasses:') | |
| if 'options' not in st.session_state: | |
| st.session_state['options'] = "" | |
| def button1_callback(): | |
| st.session_state['options'] = "Aspartylglucosaminuria (AGU) adalah gangguan metabolisme glikoprotein langka." | |
| def button2_callback(): | |
| st.session_state['options'] = "Mutasi germ - line dari gen BRCA1 membuat wanita cenderung mengalami kanker payudara dini dengan mengorbankan fungsi presumtif gen sebagai penekan tumor." | |
| placeholder = st.empty() | |
| st.caption('_Examples_') | |
| st.button('Aspartylglucosaminuria (AGU) adalah gangguan metabolisme glikoprotein langka.', use_container_width=True, on_click = button1_callback) | |
| st.button('Mutasi germ - line dari gen BRCA1 membuat wanita cenderung mengalami kanker payudara dini dengan mengorbankan fungsi presumtif gen sebagai penekan tumor.', use_container_width=True, on_click = button2_callback) | |
| with placeholder: | |
| text = st.text_area('Enter some text: ', key = 'options') | |
| if text: | |
| ner_results = model(text) | |
| ner_results2 = basemodel(text) | |
| # MendoBERT | |
| formatted_results = [] | |
| for result in ner_results: | |
| end = result["start"]+len(result["word"].replace("##", "")) | |
| if result["word"].startswith("##"): | |
| formatted_results[-1]["end"] = end | |
| formatted_results[-1]["word"]+= result["word"].replace("##", "") | |
| else: | |
| formatted_results.append({ | |
| 'start': result["start"], | |
| 'end': end, | |
| 'entity': result["entity"], | |
| 'index': result["index"], | |
| 'score': result["score"], | |
| 'word': result["word"]}) | |
| for result in formatted_results: | |
| if result["entity"].startswith("LABEL_0"): | |
| result["entity"] = "O" | |
| elif result["entity"].startswith("LABEL_1"): | |
| result["entity"] = "B" | |
| elif result["entity"].startswith("LABEL_2"): | |
| result["entity"] = "I" | |
| mendo = [] | |
| spanMendo = [] | |
| for result in formatted_results: | |
| if not result["entity"].startswith("O"): | |
| spanMendo.append((result["start"],result["end"],result["entity"])) | |
| mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}, score:{result["score"]}""") | |
| # Base Model | |
| formatted_results = [] | |
| for result in ner_results2: | |
| end = result["start"]+len(result["word"].replace("##", "")) | |
| if result["word"].startswith("##"): | |
| formatted_results[-1]["end"] = end | |
| formatted_results[-1]["word"]+= result["word"].replace("##", "") | |
| else: | |
| formatted_results.append({ | |
| 'start': result["start"], | |
| 'end': end, | |
| 'entity': result["entity"], | |
| 'index': result["index"], | |
| 'score': result["score"], | |
| 'word': result["word"]}) | |
| for result in formatted_results: | |
| if result["entity"].startswith("LABEL_0"): | |
| result["entity"] = "O" | |
| elif result["entity"].startswith("LABEL_1"): | |
| result["entity"] = "B" | |
| elif result["entity"].startswith("LABEL_2"): | |
| result["entity"] = "I" | |
| base=[] | |
| spanBase=[] | |
| for result in formatted_results: | |
| if not result["entity"].startswith("O"): | |
| spanBase.append((result["start"],result["end"],result["entity"])) | |
| base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}, score:{result["score"]}""") | |
| formatMendo = format_span_box_markup(text, spanMendo) | |
| htmlMendo = ''.join(formatMendo) | |
| formatBase = format_span_box_markup(text, spanBase) | |
| htmlBase = ''.join(formatBase) | |
| st.subheader('MendoBERT') | |
| st.json(mendo) | |
| st.markdown(htmlMendo,unsafe_allow_html=True) | |
| st.subheader('IndoLEM') | |
| st.json(base) | |
| st.markdown(htmlBase,unsafe_allow_html=True) | |
| st.write("\n") | |
| st.info("'B' means Beginning of an entity, 'I' means Inside of an entity", icon="ℹ️") | |
| text = False | |
| st.write("\n\n") | |