Spaces:
Sleeping
Sleeping
Léo Bourrel
commited on
Commit
·
3378b23
1
Parent(s):
5a5c81b
feat: share metadata with LLM + Improve doc source display
Browse files- app.py +11 -4
- custom_pgvector.py +14 -11
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
| 3 |
import streamlit as st
|
| 4 |
import streamlit.components.v1 as components
|
|
@@ -146,8 +147,14 @@ with chat_column:
|
|
| 146 |
|
| 147 |
with doc_column:
|
| 148 |
if len(st.session_state.history) > 0:
|
| 149 |
-
st.markdown("**Source
|
| 150 |
for doc in st.session_state.history[-1].documents:
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
expander.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import json
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
import streamlit.components.v1 as components
|
|
|
|
| 147 |
|
| 148 |
with doc_column:
|
| 149 |
if len(st.session_state.history) > 0:
|
| 150 |
+
st.markdown("**Source documents**")
|
| 151 |
for doc in st.session_state.history[-1].documents:
|
| 152 |
+
doc_content = json.loads(doc.page_content)
|
| 153 |
+
|
| 154 |
+
expander = st.expander(doc_content["title"])
|
| 155 |
+
expander.markdown("**" + doc_content["doi"] + "**")
|
| 156 |
+
expander.markdown(doc_content["abstract"])
|
| 157 |
+
expander.markdown("**Authors** : " + doc_content["authors"])
|
| 158 |
+
expander.markdown("**Keywords** : " + doc_content["keywords"])
|
| 159 |
+
expander.markdown("**Distance** : " + str(doc_content["distance"]))
|
| 160 |
+
|
custom_pgvector.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from __future__ import annotations
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import asyncio
|
| 4 |
import contextlib
|
|
@@ -344,19 +345,20 @@ class CustomPGVector(VectorStore):
|
|
| 344 |
docs = [
|
| 345 |
(
|
| 346 |
Document(
|
| 347 |
-
page_content=
|
| 348 |
-
|
| 349 |
-
"id": result
|
| 350 |
-
"title": result
|
| 351 |
-
"authors": result
|
| 352 |
-
"doi": result
|
| 353 |
-
"
|
| 354 |
-
"
|
| 355 |
-
|
|
|
|
| 356 |
),
|
| 357 |
-
result
|
| 358 |
)
|
| 359 |
-
for result in results
|
| 360 |
]
|
| 361 |
return docs
|
| 362 |
|
|
@@ -392,6 +394,7 @@ class CustomPGVector(VectorStore):
|
|
| 392 |
)
|
| 393 |
results = results.fetchall()
|
| 394 |
results = pd.DataFrame(results, columns=["id", "title", "doi", "abstract", "keywords", "authors", "distance"])
|
|
|
|
| 395 |
return results
|
| 396 |
|
| 397 |
def similarity_search_by_vector(
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
import pandas as pd
|
| 4 |
import asyncio
|
| 5 |
import contextlib
|
|
|
|
| 345 |
docs = [
|
| 346 |
(
|
| 347 |
Document(
|
| 348 |
+
page_content=json.dumps({
|
| 349 |
+
"abstract": result["abstract"],
|
| 350 |
+
"id": result["id"],
|
| 351 |
+
"title": result["title"],
|
| 352 |
+
"authors": result["authors"],
|
| 353 |
+
"doi": result["doi"],
|
| 354 |
+
"halID": result["halID"],
|
| 355 |
+
"keywords": result["keywords"],
|
| 356 |
+
"distance": result["distance"],
|
| 357 |
+
}),
|
| 358 |
),
|
| 359 |
+
result["distance"] if self.embedding_function is not None else None,
|
| 360 |
)
|
| 361 |
+
for result in results
|
| 362 |
]
|
| 363 |
return docs
|
| 364 |
|
|
|
|
| 394 |
)
|
| 395 |
results = results.fetchall()
|
| 396 |
results = pd.DataFrame(results, columns=["id", "title", "doi", "abstract", "keywords", "authors", "distance"])
|
| 397 |
+
results = results.to_dict(orient="records")
|
| 398 |
return results
|
| 399 |
|
| 400 |
def similarity_search_by_vector(
|