Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -117,7 +117,7 @@ if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
|
|
| 117 |
st.json(docs[0].metadata)
|
| 118 |
|
| 119 |
# Extract metadata
|
| 120 |
-
|
| 121 |
|
| 122 |
# Display extracted metadata
|
| 123 |
st.subheader("π Extracted Document Metadata")
|
|
@@ -131,8 +131,8 @@ if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
|
|
| 131 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
| 132 |
|
| 133 |
# Convert metadata into a retrievable chunk
|
| 134 |
-
|
| 135 |
-
|
| 136 |
|
| 137 |
# Prevent unnecessary re-chunking
|
| 138 |
if not st.session_state.chunked:
|
|
|
|
| 117 |
st.json(docs[0].metadata)
|
| 118 |
|
| 119 |
# Extract metadata
|
| 120 |
+
metadata = extract_metadata_llm(st.session_state.pdf_path)
|
| 121 |
|
| 122 |
# Display extracted metadata
|
| 123 |
st.subheader("π Extracted Document Metadata")
|
|
|
|
| 131 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
| 132 |
|
| 133 |
# Convert metadata into a retrievable chunk
|
| 134 |
+
metadata_doc = {"page_content": metadata, "metadata": {"source": "metadata"}}
|
| 135 |
+
|
| 136 |
|
| 137 |
# Prevent unnecessary re-chunking
|
| 138 |
if not st.session_state.chunked:
|