Spaces:
Paused
Paused
Commit
·
0159ca5
1
Parent(s):
2389c43
url loading testing
Browse files- app.py +5 -1
- helper_functions.py +3 -7
app.py
CHANGED
|
@@ -21,6 +21,7 @@ def rename(orig_author: str):
|
|
| 21 |
|
| 22 |
@cl.on_message
|
| 23 |
async def main(message: cl.Message):
|
|
|
|
| 24 |
if message.content.startswith("http://") or message.content.startswith("https://"):
|
| 25 |
message_type = "url"
|
| 26 |
else:
|
|
@@ -43,7 +44,7 @@ async def main(message: cl.Message):
|
|
| 43 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
| 44 |
|
| 45 |
await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
|
| 46 |
-
|
| 47 |
except Exception as e:
|
| 48 |
await cl.Message(f"Error processing the document: {e}").send()
|
| 49 |
else:
|
|
@@ -85,13 +86,16 @@ async def handle_response(res):
|
|
| 85 |
|
| 86 |
# load the file
|
| 87 |
docs = await asyncio.to_thread(process_file, file)
|
|
|
|
| 88 |
splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
|
|
|
|
| 89 |
for i, doc in enumerate(splits):
|
| 90 |
doc.metadata["user_upload_source"] = f"source_{i}"
|
| 91 |
print(f"Processing {len(docs)} text chunks")
|
| 92 |
|
| 93 |
# Add to the qdrant_store
|
| 94 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
|
|
|
| 95 |
|
| 96 |
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
| 97 |
await msg.update()
|
|
|
|
| 21 |
|
| 22 |
@cl.on_message
|
| 23 |
async def main(message: cl.Message):
|
| 24 |
+
await cl.Message(f"Processing `{message.content}`", disable_human_feedback=True)
|
| 25 |
if message.content.startswith("http://") or message.content.startswith("https://"):
|
| 26 |
message_type = "url"
|
| 27 |
else:
|
|
|
|
| 44 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
| 45 |
|
| 46 |
await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
|
| 47 |
+
|
| 48 |
except Exception as e:
|
| 49 |
await cl.Message(f"Error processing the document: {e}").send()
|
| 50 |
else:
|
|
|
|
| 86 |
|
| 87 |
# load the file
|
| 88 |
docs = await asyncio.to_thread(process_file, file)
|
| 89 |
+
await cl.Message(content="loaded docs").send()
|
| 90 |
splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
|
| 91 |
+
await cl.Message(content="split docs").send()
|
| 92 |
for i, doc in enumerate(splits):
|
| 93 |
doc.metadata["user_upload_source"] = f"source_{i}"
|
| 94 |
print(f"Processing {len(docs)} text chunks")
|
| 95 |
|
| 96 |
# Add to the qdrant_store
|
| 97 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
| 98 |
+
await cl.Message(content="added to vs").send()
|
| 99 |
|
| 100 |
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
| 101 |
await msg.update()
|
helper_functions.py
CHANGED
|
@@ -14,17 +14,13 @@ def process_file(file):
|
|
| 14 |
temp_file = "./"+file.path
|
| 15 |
with open(temp_file, "wb") as file:
|
| 16 |
file.write(file.content)
|
| 17 |
-
|
| 18 |
-
documents = []
|
| 19 |
if file.path.endswith(".pdf"):
|
| 20 |
loader = PyMuPDFLoader(temp_file)
|
| 21 |
-
|
| 22 |
-
documents.extend(docs)
|
| 23 |
else:
|
| 24 |
loader = TextLoader(temp_file)
|
| 25 |
-
|
| 26 |
-
documents.extend(docs)
|
| 27 |
-
return documents
|
| 28 |
|
| 29 |
def load_documents_from_url(url):
|
| 30 |
try:
|
|
|
|
| 14 |
temp_file = "./"+file.path
|
| 15 |
with open(temp_file, "wb") as file:
|
| 16 |
file.write(file.content)
|
| 17 |
+
|
|
|
|
| 18 |
if file.path.endswith(".pdf"):
|
| 19 |
loader = PyMuPDFLoader(temp_file)
|
| 20 |
+
return loader.load()
|
|
|
|
| 21 |
else:
|
| 22 |
loader = TextLoader(temp_file)
|
| 23 |
+
return loader.load()
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def load_documents_from_url(url):
|
| 26 |
try:
|