Spaces:

CoExperiences
/

aie4-final

Paused

angry-meow commited on Oct 20, 2024

Commit

0159ca5

1 Parent(s): 2389c43

url loading testing

Files changed (2) hide show

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ def rename(orig_author: str):
 @cl.on_message
 async def main(message: cl.Message):
     if message.content.startswith("http://") or message.content.startswith("https://"):
         message_type = "url"
     else:
@@ -43,7 +44,7 @@ async def main(message: cl.Message):
             await asyncio.to_thread(qdrant_store.add_documents, splits)
             await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
         except Exception as e:
             await cl.Message(f"Error processing the document: {e}").send()
     else:
@@ -85,13 +86,16 @@ async def handle_response(res):
         # load the file
         docs = await asyncio.to_thread(process_file, file)
         splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
         for i, doc in enumerate(splits):
             doc.metadata["user_upload_source"] = f"source_{i}"
         print(f"Processing {len(docs)} text chunks")
         # Add to the qdrant_store
         await asyncio.to_thread(qdrant_store.add_documents, splits)
         msg.content = f"Processing `{file.name}` done. You can now ask questions!"
         await msg.update()

 @cl.on_message
 async def main(message: cl.Message):
+    await cl.Message(f"Processing `{message.content}`", disable_human_feedback=True)
     if message.content.startswith("http://") or message.content.startswith("https://"):
         message_type = "url"
     else:
             await asyncio.to_thread(qdrant_store.add_documents, splits)
             await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
         except Exception as e:
             await cl.Message(f"Error processing the document: {e}").send()
     else:
         # load the file
         docs = await asyncio.to_thread(process_file, file)
+        await cl.Message(content="loaded docs").send()
         splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
+        await cl.Message(content="split docs").send()
         for i, doc in enumerate(splits):
             doc.metadata["user_upload_source"] = f"source_{i}"
         print(f"Processing {len(docs)} text chunks")
         # Add to the qdrant_store
         await asyncio.to_thread(qdrant_store.add_documents, splits)
+        await cl.Message(content="added to vs").send()
         msg.content = f"Processing `{file.name}` done. You can now ask questions!"
         await msg.update()

helper_functions.py CHANGED Viewed

@@ -14,17 +14,13 @@ def process_file(file):
     temp_file = "./"+file.path
     with open(temp_file, "wb") as file:
        file.write(file.content)
-    documents = []
     if file.path.endswith(".pdf"):
         loader = PyMuPDFLoader(temp_file)
-        docs = loader.load()
-        documents.extend(docs)
     else:
         loader = TextLoader(temp_file)
-        docs = loader.load()
-        documents.extend(docs)
-    return documents
 def load_documents_from_url(url):
     try:

     temp_file = "./"+file.path
     with open(temp_file, "wb") as file:
        file.write(file.content)
     if file.path.endswith(".pdf"):
         loader = PyMuPDFLoader(temp_file)
+        return loader.load()
     else:
         loader = TextLoader(temp_file)
+        return loader.load()
 def load_documents_from_url(url):
     try: