Spaces:
Running
Running
kiyer
commited on
Commit
·
7d0b0c1
1
Parent(s):
793347c
try fix for index issue
Browse files
app.py
CHANGED
|
@@ -243,28 +243,28 @@ class RetrievalSystem():
|
|
| 243 |
query_embedding,
|
| 244 |
rerank_top_k,
|
| 245 |
return_scores = False)
|
| 246 |
-
try:
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
except:
|
| 267 |
-
print('heavy load, please wait 10s and try again.')
|
| 268 |
else:
|
| 269 |
top_results, small_df = self.rank_and_filter(query,
|
| 270 |
query_embedding,
|
|
@@ -278,6 +278,8 @@ class RetrievalSystem():
|
|
| 278 |
df = pd.DataFrame(small_df)
|
| 279 |
df = df.drop(columns=['umap_x','umap_y','cite_bibcodes','ref_bibcodes'])
|
| 280 |
links = ['https://ui.adsabs.harvard.edu/abs/'+i+'/abstract' for i in small_df['bibcode']]
|
|
|
|
|
|
|
| 281 |
scores = [top_results[i] for i in top_results]
|
| 282 |
indices = [i for i in top_results]
|
| 283 |
df.insert(1,'ADS Link',links,True)
|
|
@@ -477,7 +479,7 @@ def run_agent_qa(query):
|
|
| 477 |
|
| 478 |
def run_rag_qa(query, papers_df):
|
| 479 |
|
| 480 |
-
try:
|
| 481 |
loaders = []
|
| 482 |
|
| 483 |
documents = []
|
|
@@ -497,6 +499,8 @@ def run_rag_qa(query, papers_df):
|
|
| 497 |
# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6, "fetch_k": len(splits)})
|
| 498 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
|
| 499 |
|
|
|
|
|
|
|
| 500 |
if st.session_state.question_type == 'Bibliometric':
|
| 501 |
template = bibliometric_prompt
|
| 502 |
elif st.session_state.question_type == 'Single-paper':
|
|
@@ -523,10 +527,10 @@ def run_rag_qa(query, papers_df):
|
|
| 523 |
rag_answer = rag_chain_with_source.invoke(query, )
|
| 524 |
vectorstore.delete_collection()
|
| 525 |
|
| 526 |
-
except:
|
| 527 |
-
|
| 528 |
|
| 529 |
-
|
| 530 |
|
| 531 |
def guess_question_type(query: str):
|
| 532 |
|
|
|
|
| 243 |
query_embedding,
|
| 244 |
rerank_top_k,
|
| 245 |
return_scores = False)
|
| 246 |
+
# try:
|
| 247 |
+
docs_for_rerank = [small_df['abstract'][i] for i in range(rerank_top_k)]
|
| 248 |
+
if len(docs_for_rerank) == 0:
|
| 249 |
+
return []
|
| 250 |
+
reranked_results = self.cohere_client.rerank(
|
| 251 |
+
query=query,
|
| 252 |
+
documents=docs_for_rerank,
|
| 253 |
+
model='rerank-english-v3.0',
|
| 254 |
+
top_n=top_k
|
| 255 |
+
)
|
| 256 |
+
final_results = []
|
| 257 |
+
for result in reranked_results.results:
|
| 258 |
+
doc_id = top_results[result.index]
|
| 259 |
+
doc_text = docs_for_rerank[result.index]
|
| 260 |
+
score = float(result.relevance_score)
|
| 261 |
+
final_results.append([doc_id, "", score])
|
| 262 |
+
final_indices = [doc[0] for doc in final_results]
|
| 263 |
+
if return_scores:
|
| 264 |
+
return {result[0]: result[2] for result in final_results}, self.dataset[final_indices]
|
| 265 |
+
return [doc[0] for doc in final_results], self.dataset[final_indices]
|
| 266 |
+
# except:
|
| 267 |
+
# print('heavy load, please wait 10s and try again.')
|
| 268 |
else:
|
| 269 |
top_results, small_df = self.rank_and_filter(query,
|
| 270 |
query_embedding,
|
|
|
|
| 278 |
df = pd.DataFrame(small_df)
|
| 279 |
df = df.drop(columns=['umap_x','umap_y','cite_bibcodes','ref_bibcodes'])
|
| 280 |
links = ['https://ui.adsabs.harvard.edu/abs/'+i+'/abstract' for i in small_df['bibcode']]
|
| 281 |
+
|
| 282 |
+
# st.write(top_results[0:10])
|
| 283 |
scores = [top_results[i] for i in top_results]
|
| 284 |
indices = [i for i in top_results]
|
| 285 |
df.insert(1,'ADS Link',links,True)
|
|
|
|
| 479 |
|
| 480 |
def run_rag_qa(query, papers_df):
|
| 481 |
|
| 482 |
+
# try:
|
| 483 |
loaders = []
|
| 484 |
|
| 485 |
documents = []
|
|
|
|
| 499 |
# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6, "fetch_k": len(splits)})
|
| 500 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
|
| 501 |
|
| 502 |
+
|
| 503 |
+
|
| 504 |
if st.session_state.question_type == 'Bibliometric':
|
| 505 |
template = bibliometric_prompt
|
| 506 |
elif st.session_state.question_type == 'Single-paper':
|
|
|
|
| 527 |
rag_answer = rag_chain_with_source.invoke(query, )
|
| 528 |
vectorstore.delete_collection()
|
| 529 |
|
| 530 |
+
# except:
|
| 531 |
+
# st.subheader('heavy load! please wait 10 seconds and try again.')
|
| 532 |
|
| 533 |
+
return rag_answer
|
| 534 |
|
| 535 |
def guess_question_type(query: str):
|
| 536 |
|