Spaces:
Sleeping
Sleeping
trying to make things a bit faster
Browse files
app.py
CHANGED
|
@@ -131,6 +131,10 @@ if 'ids' not in st.session_state:
|
|
| 131 |
st.session_state.cites = arxiv_corpus['cites']
|
| 132 |
st.session_state.years = arxiv_corpus['date']
|
| 133 |
st.session_state.kws = arxiv_corpus['keywords']
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
|
| 135 |
|
| 136 |
|
|
@@ -194,7 +198,7 @@ class EmbeddingRetrievalSystem():
|
|
| 194 |
self.abstract = st.session_state.abstracts
|
| 195 |
self.client = OpenAI(api_key = openai_key)
|
| 196 |
self.embed_model = "text-embedding-3-small"
|
| 197 |
-
self.dataset = arxiv_corpus
|
| 198 |
self.kws = st.session_state.kws
|
| 199 |
self.cites = st.session_state.cites
|
| 200 |
|
|
@@ -495,7 +499,7 @@ def Library(query, top_k = 7):
|
|
| 495 |
rs = get_topk(query, top_k = top_k)
|
| 496 |
op_docs = ''
|
| 497 |
for paperno, i in enumerate(rs):
|
| 498 |
-
op_docs = op_docs + 'Paper %.0f:' %(paperno+1) +' (published in '+st.session_state.
|
| 499 |
|
| 500 |
return op_docs
|
| 501 |
|
|
@@ -504,7 +508,7 @@ def Library2(query, top_k = 7):
|
|
| 504 |
absts, fnames = [], []
|
| 505 |
for paperno, i in enumerate(rs):
|
| 506 |
absts.append(st.session_state.abstracts[i])
|
| 507 |
-
fnames.append(st.session_state.
|
| 508 |
return absts, fnames, rs
|
| 509 |
|
| 510 |
def get_paper_df(ids):
|
|
@@ -513,10 +517,10 @@ def get_paper_df(ids):
|
|
| 513 |
for i in ids:
|
| 514 |
papers.append(st.session_state.titles[i])
|
| 515 |
scores.append(ids[i])
|
| 516 |
-
links.append('https://ui.adsabs.harvard.edu/abs/'+st.session_state.
|
| 517 |
-
yrs.append(st.session_state.
|
| 518 |
-
cites.append(st.session_state.
|
| 519 |
-
kws.append(st.session_state.
|
| 520 |
|
| 521 |
return pd.DataFrame({
|
| 522 |
'Title': papers,
|
|
@@ -587,10 +591,10 @@ def create_embedding_plot(rs):
|
|
| 587 |
"""
|
| 588 |
|
| 589 |
pltsource = ColumnDataSource(data=dict(
|
| 590 |
-
x=st.session_state.
|
| 591 |
-
y=st.session_state.
|
| 592 |
title=st.session_state.titles,
|
| 593 |
-
link=st.session_state.
|
| 594 |
))
|
| 595 |
|
| 596 |
rsflag = np.zeros((len(st.session_state.ids),))
|
|
@@ -995,8 +999,8 @@ def main():
|
|
| 995 |
column_config = {'ADS Link':st.column_config.LinkColumn(display_text= 'https://ui.adsabs.harvard.edu/abs/(.*?)/abstract')}
|
| 996 |
)
|
| 997 |
|
| 998 |
-
with st.expander("Embedding map", expanded=False):
|
| 999 |
-
|
| 1000 |
|
| 1001 |
col1, col2 = st.columns(2)
|
| 1002 |
|
|
|
|
| 131 |
st.session_state.cites = arxiv_corpus['cites']
|
| 132 |
st.session_state.years = arxiv_corpus['date']
|
| 133 |
st.session_state.kws = arxiv_corpus['keywords']
|
| 134 |
+
st.session_state.ads_kws = arxiv_corpus['ads_keywords']
|
| 135 |
+
st.session_state.bibcode = arxiv_corpus['bibcode']
|
| 136 |
+
st.session_state.umap_x = arxiv_corpus['umap_x']
|
| 137 |
+
st.session_state.umap_y = arxiv_corpus['umap_y']
|
| 138 |
st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
|
| 139 |
|
| 140 |
|
|
|
|
| 198 |
self.abstract = st.session_state.abstracts
|
| 199 |
self.client = OpenAI(api_key = openai_key)
|
| 200 |
self.embed_model = "text-embedding-3-small"
|
| 201 |
+
self.dataset = st.session_state.arxiv_corpus
|
| 202 |
self.kws = st.session_state.kws
|
| 203 |
self.cites = st.session_state.cites
|
| 204 |
|
|
|
|
| 499 |
rs = get_topk(query, top_k = top_k)
|
| 500 |
op_docs = ''
|
| 501 |
for paperno, i in enumerate(rs):
|
| 502 |
+
op_docs = op_docs + 'Paper %.0f:' %(paperno+1) +' (published in '+st.session_state.bibcode[i][0:4] + ') ' + st.session_state.titles[i] + '\n' + st.session_state.abstracts[i] + '\n\n'
|
| 503 |
|
| 504 |
return op_docs
|
| 505 |
|
|
|
|
| 508 |
absts, fnames = [], []
|
| 509 |
for paperno, i in enumerate(rs):
|
| 510 |
absts.append(st.session_state.abstracts[i])
|
| 511 |
+
fnames.append(st.session_state.bibcode[i])
|
| 512 |
return absts, fnames, rs
|
| 513 |
|
| 514 |
def get_paper_df(ids):
|
|
|
|
| 517 |
for i in ids:
|
| 518 |
papers.append(st.session_state.titles[i])
|
| 519 |
scores.append(ids[i])
|
| 520 |
+
links.append('https://ui.adsabs.harvard.edu/abs/'+st.session_state.bibcode[i]+'/abstract')
|
| 521 |
+
yrs.append(st.session_state.bibcode[i][0:4])
|
| 522 |
+
cites.append(st.session_state.cites[i])
|
| 523 |
+
kws.append(st.session_state.ads_kws[i])
|
| 524 |
|
| 525 |
return pd.DataFrame({
|
| 526 |
'Title': papers,
|
|
|
|
| 591 |
"""
|
| 592 |
|
| 593 |
pltsource = ColumnDataSource(data=dict(
|
| 594 |
+
x=st.session_state.umap_x,
|
| 595 |
+
y=st.session_state.umap_y,
|
| 596 |
title=st.session_state.titles,
|
| 597 |
+
link=st.session_state.bibcode,
|
| 598 |
))
|
| 599 |
|
| 600 |
rsflag = np.zeros((len(st.session_state.ids),))
|
|
|
|
| 999 |
column_config = {'ADS Link':st.column_config.LinkColumn(display_text= 'https://ui.adsabs.harvard.edu/abs/(.*?)/abstract')}
|
| 1000 |
)
|
| 1001 |
|
| 1002 |
+
# with st.expander("Embedding map", expanded=False):
|
| 1003 |
+
st.bokeh_chart(embedding_plot)
|
| 1004 |
|
| 1005 |
col1, col2 = st.columns(2)
|
| 1006 |
|