Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import pandas as pd
|
|
| 5 |
|
| 6 |
from classifier import classify
|
| 7 |
from statistics import mean
|
| 8 |
-
from genra_incremental import GenraPipeline
|
| 9 |
from qa_summary import generate_answer
|
| 10 |
|
| 11 |
|
|
@@ -88,51 +88,51 @@ def add_query(to_add, history):
|
|
| 88 |
history.append(to_add)
|
| 89 |
return gr.CheckboxGroup(choices=history), history
|
| 90 |
|
| 91 |
-
def qa_process(selected_queries, qa_llm_model, aggregator,
|
| 92 |
-
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
|
| 117 |
-
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
|
| 135 |
-
|
| 136 |
|
| 137 |
def qa_summarise(selected_queries, qa_llm_model, text_field, data_df):
|
| 138 |
|
|
|
|
| 5 |
|
| 6 |
from classifier import classify
|
| 7 |
from statistics import mean
|
| 8 |
+
# from genra_incremental import GenraPipeline
|
| 9 |
from qa_summary import generate_answer
|
| 10 |
|
| 11 |
|
|
|
|
| 88 |
history.append(to_add)
|
| 89 |
return gr.CheckboxGroup(choices=history), history
|
| 90 |
|
| 91 |
+
# def qa_process(selected_queries, qa_llm_model, aggregator,
|
| 92 |
+
# batch_size, topk, text_field, data_df):
|
| 93 |
|
| 94 |
+
# emb_model = 'multi-qa-mpnet-base-dot-v1'
|
| 95 |
+
# contexts = []
|
| 96 |
|
| 97 |
+
# queries_df = pd.DataFrame({'id':[j for j in range(len(selected_queries))],'query': selected_queries})
|
| 98 |
+
# qa_input_df = data_df[data_df["model_label"] != "none"].reset_index()
|
| 99 |
|
| 100 |
+
# tweets_df = qa_input_df[[text_field]]
|
| 101 |
+
# tweets_df.reset_index(inplace=True)
|
| 102 |
+
# tweets_df.rename(columns={"index": "order", text_field: "text"},inplace=True)
|
| 103 |
|
| 104 |
+
# gr.Info("Loading GENRA pipeline....")
|
| 105 |
+
# genra = GenraPipeline(qa_llm_model, emb_model, aggregator, contexts)
|
| 106 |
+
# gr.Info("Waiting for data...")
|
| 107 |
+
# batches = [tweets_df[i:i+batch_size] for i in range(0,len(tweets_df),batch_size)]
|
| 108 |
|
| 109 |
+
# genra_answers = []
|
| 110 |
+
# summarize_batch = True
|
| 111 |
+
# for batch_number, tweets in enumerate(batches):
|
| 112 |
+
# gr.Info(f"Populating index for batch {batch_number}")
|
| 113 |
+
# genra.qa_indexer.index_dataframe(tweets)
|
| 114 |
+
# gr.Info(f"Performing retrieval for batch {batch_number}")
|
| 115 |
+
# genra.retrieval(batch_number, queries_df, topk, summarize_batch)
|
| 116 |
|
| 117 |
+
# gr.Info("Processed all batches!")
|
| 118 |
|
| 119 |
+
# gr.Info("Getting summary...")
|
| 120 |
+
# summary = genra.summarize_history(queries_df)
|
| 121 |
|
| 122 |
+
# gr.Info("Preparing results...")
|
| 123 |
+
# results = genra.answers_store
|
| 124 |
+
# final_answers, q_a = [], []
|
| 125 |
+
# for q, g_answers in results.items():
|
| 126 |
+
# for answer in g_answers:
|
| 127 |
+
# final_answers.append({'question':q, "tweets":answer['tweets'], "batch":answer['batch_number'], "summary":answer['summary'] })
|
| 128 |
+
# for t in answer['tweets']:
|
| 129 |
+
# q_a.append((q,t))
|
| 130 |
+
# answers_df = pd.DataFrame.from_dict(final_answers)
|
| 131 |
+
# q_a = list(set(q_a))
|
| 132 |
+
# q_a_df = pd.DataFrame(q_a, columns =['question', 'tweet'])
|
| 133 |
+
# q_a_df = q_a_df.sort_values(by=["question"], ascending=False)
|
| 134 |
|
| 135 |
+
# return q_a_df, answers_df, summary
|
| 136 |
|
| 137 |
def qa_summarise(selected_queries, qa_llm_model, text_field, data_df):
|
| 138 |
|