Spaces:
Runtime error
Runtime error
integrate summarization pipeline
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import torch
|
|
| 2 |
import nltk
|
| 3 |
import validators
|
| 4 |
import streamlit as st
|
| 5 |
-
from transformers import
|
| 6 |
|
| 7 |
# local modules
|
| 8 |
from extractive_summarizer.model_processors import Summarizer
|
|
@@ -12,12 +12,12 @@ from src.abstractive_summarizer import (
|
|
| 12 |
preprocess_text_for_abstractive_summarization,
|
| 13 |
)
|
| 14 |
|
| 15 |
-
# abstractive summarizer model
|
| 16 |
-
@st.cache()
|
| 17 |
-
def load_abs_model():
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
|
| 22 |
|
| 23 |
if __name__ == "__main__":
|
|
@@ -28,7 +28,13 @@ if __name__ == "__main__":
|
|
| 28 |
summarize_type = st.sidebar.selectbox(
|
| 29 |
"Summarization type", options=["Extractive", "Abstractive"]
|
| 30 |
)
|
|
|
|
|
|
|
| 31 |
nltk.download("punkt")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
inp_text = st.text_input("Enter text or a url here")
|
| 34 |
|
|
@@ -65,26 +71,15 @@ if __name__ == "__main__":
|
|
| 65 |
text="Creating abstractive summary. This might take a few seconds ..."
|
| 66 |
):
|
| 67 |
text_to_summarize = clean_txt
|
| 68 |
-
|
| 69 |
if not is_url:
|
| 70 |
# list of chunks
|
| 71 |
text_to_summarize = preprocess_text_for_abstractive_summarization(
|
| 72 |
tokenizer=abs_tokenizer, text=clean_txt
|
| 73 |
)
|
| 74 |
-
|
| 75 |
-
abs_tokenizer, abs_model, text_to_summarize
|
| 76 |
-
)
|
| 77 |
|
| 78 |
-
|
| 79 |
-
# summarized_text = abstractive_summarizer(
|
| 80 |
-
# abs_tokenizer, abs_model, text_to_summarize
|
| 81 |
-
# )
|
| 82 |
-
# elif summarize_type == "Abstractive" and is_url:
|
| 83 |
-
# abs_url_summarizer = pipeline("summarization")
|
| 84 |
-
# tmp_sum = abs_url_summarizer(
|
| 85 |
-
# text_to_summarize, max_length=120, min_length=30, do_sample=False
|
| 86 |
-
# )
|
| 87 |
-
# summarized_text = " ".join([summ["summary_text"] for summ in tmp_sum])
|
| 88 |
|
| 89 |
# final summarized output
|
| 90 |
st.subheader("Summarized text")
|
|
|
|
| 2 |
import nltk
|
| 3 |
import validators
|
| 4 |
import streamlit as st
|
| 5 |
+
from transformers import pipeline, T5Tokenizer
|
| 6 |
|
| 7 |
# local modules
|
| 8 |
from extractive_summarizer.model_processors import Summarizer
|
|
|
|
| 12 |
preprocess_text_for_abstractive_summarization,
|
| 13 |
)
|
| 14 |
|
| 15 |
+
# # abstractive summarizer model
|
| 16 |
+
# @st.cache()
|
| 17 |
+
# def load_abs_model():
|
| 18 |
+
# tokenizer = T5Tokenizer.from_pretrained("t5-base")
|
| 19 |
+
# model = T5ForConditionalGeneration.from_pretrained("t5-base")
|
| 20 |
+
# return tokenizer, model
|
| 21 |
|
| 22 |
|
| 23 |
if __name__ == "__main__":
|
|
|
|
| 28 |
summarize_type = st.sidebar.selectbox(
|
| 29 |
"Summarization type", options=["Extractive", "Abstractive"]
|
| 30 |
)
|
| 31 |
+
# ---------------------------
|
| 32 |
+
# SETUP
|
| 33 |
nltk.download("punkt")
|
| 34 |
+
abs_tokenizer_name = "t5-base"
|
| 35 |
+
abs_model_name = "t5-base"
|
| 36 |
+
abs_tokenizer = T5Tokenizer.from_pretrained(abs_tokenizer_name)
|
| 37 |
+
# ---------------------------
|
| 38 |
|
| 39 |
inp_text = st.text_input("Enter text or a url here")
|
| 40 |
|
|
|
|
| 71 |
text="Creating abstractive summary. This might take a few seconds ..."
|
| 72 |
):
|
| 73 |
text_to_summarize = clean_txt
|
| 74 |
+
abs_summarizer = pipeline("summarization")
|
| 75 |
if not is_url:
|
| 76 |
# list of chunks
|
| 77 |
text_to_summarize = preprocess_text_for_abstractive_summarization(
|
| 78 |
tokenizer=abs_tokenizer, text=clean_txt
|
| 79 |
)
|
| 80 |
+
tmp_sum = abs_summarizer(text_to_summarize, do_sample=False)
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
summarized_text = " ".join([summ["summary_text"] for summ in tmp_sum])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
# final summarized output
|
| 85 |
st.subheader("Summarized text")
|