Spaces:

CoExperiences
/

aie4-final

Paused

angry-meow commited on Oct 16, 2024

Commit

be3813f

2 Parent(s): d523035 9a71317

Merge branch 'main' of https://huggingface.co/spaces/CoExperiences/aie4-final into main

Files changed (2) hide show

app.py CHANGED Viewed

@@ -51,7 +51,7 @@ async def on_chat_start():
         files = await cl.AskFileMessage(
             content="Please upload a Text or PDF File file to begin!",
             accept=["text/plain", "application/pdf"],
-            max_size_mb=2,
         ).send()
         file = files[0]
@@ -63,14 +63,12 @@ async def on_chat_start():
         # load the file
         docs = process_file(file)
-        for i, doc in enumerate(docs):
-            doc.metadata["source"] = f"source_{i}" # TO DO: Add metadata
-            add_to_qdrant(doc, te3_small, qdrant_client, collection_name)
         print(f"Processing {len(docs)} text chunks")
         # Add to the qdrant_store
-        splits = text_splitter.split_documents(docs)
         qdrant_store.add_documents(
             documents=splits
         )

         files = await cl.AskFileMessage(
             content="Please upload a Text or PDF File file to begin!",
             accept=["text/plain", "application/pdf"],
+            max_size_mb=12,
         ).send()
         file = files[0]
         # load the file
         docs = process_file(file)
+        splits = text_splitter.split_documents(docs)
+        for i, doc in enumerate(splits):
+            doc.metadata["source"] = f"source_{i}"
         print(f"Processing {len(docs)} text chunks")
         # Add to the qdrant_store
         qdrant_store.add_documents(
             documents=splits
         )

helper_functions.py CHANGED Viewed

@@ -1,14 +1,21 @@
 from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
 from langchain_community.vectorstores import Qdrant
-def process_file(file):
     documents = []
-    if file.endswith(".pdf"):
-        loader = PyMuPDFLoader(file)
         docs = loader.load()
         documents.extend(docs)
     else:
-        loader = TextLoader(file)
         docs = loader.load()
         documents.extend(docs)
     return documents

 from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
 from langchain_community.vectorstores import Qdrant
+import os
+def process_file(uploaded_file):
+    # save the file temporarily
+    temp_file = "./temp.pdf"
+    with open(temp_file, "wb") as file:
+       file.write(uploaded_file.content)
+       file_name = uploaded_file.name
     documents = []
+    if uploaded_file.path.endswith(".pdf"):
+        loader = PyMuPDFLoader(temp_file)
         docs = loader.load()
         documents.extend(docs)
     else:
+        loader = TextLoader(tmp_location)
         docs = loader.load()
         documents.extend(docs)
     return documents