Spaces:

Deaksh
/

research-tool

Sleeping

App Files Files Community

Deaksh commited on Feb 18

Commit

cd921da

verified ·

1 Parent(s): a471490

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -12

app.py CHANGED Viewed

@@ -41,6 +41,14 @@ if process_url_clicked:
     main_placeholder.text("Text Splitter...Started...✅✅✅")
     docs = text_splitter.split_documents(data)
     # Create embeddings using HuggingFaceEmbeddings
     embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     main_placeholder.text("Embedding Vector Started Building...✅✅✅")
@@ -48,22 +56,35 @@ if process_url_clicked:
     # Generate embeddings
     embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
     # Convert embeddings to numpy array (needed by FAISS)
     embeddings_np = np.array(embeddings).astype(np.float32)
-    # Create FAISS index
-    dimension = len(embeddings[0])  # Embedding vector dimension
-    index = FAISS(dimension)
-    index.add(embeddings_np)  # Add embeddings to FAISS index
-    # Wrap FAISS index using LangChain FAISS wrapper
-    vectorstore_huggingface = FAISS(embedding_function=embedding_model, index=index)
-    # Save the FAISS index to a pickle file
-    with open(file_path, "wb") as f:
-        pickle.dump(vectorstore_huggingface, f)
-    time.sleep(2)
 query = main_placeholder.text_input("Question: ")
 if query:
@@ -89,3 +110,4 @@ if query:

     main_placeholder.text("Text Splitter...Started...✅✅✅")
     docs = text_splitter.split_documents(data)
+    # Debugging: Check if docs is empty
+    if not docs:
+        main_placeholder.text("No valid documents found! Please check the URLs.")
+    # Debugging: Check the content of docs
+    for doc in docs:
+        main_placeholder.text(f"Document content: {doc.page_content[:200]}")  # Show first 200 chars of each document
     # Create embeddings using HuggingFaceEmbeddings
     embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     main_placeholder.text("Embedding Vector Started Building...✅✅✅")
     # Generate embeddings
     embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
+    # Debugging: Check if embeddings are generated
+    if not embeddings:
+        main_placeholder.text("No embeddings were generated! Check the embedding model or document content.")
+    # Check the size of embeddings
+    main_placeholder.text(f"Generated {len(embeddings)} embeddings.")
     # Convert embeddings to numpy array (needed by FAISS)
     embeddings_np = np.array(embeddings).astype(np.float32)
+    # Check the shape of the embeddings
+    main_placeholder.text(f"Shape of embeddings: {embeddings_np.shape}")
+    # Create FAISS index
+    if len(embeddings) > 0:
+        dimension = len(embeddings[0])  # Embedding vector dimension
+        index = FAISS(dimension)
+        index.add(embeddings_np)  # Add embeddings to FAISS index
+        # Wrap FAISS index using LangChain FAISS wrapper
+        vectorstore_huggingface = FAISS(embedding_function=embedding_model, index=index)
+        # Save the FAISS index to a pickle file
+        with open(file_path, "wb") as f:
+            pickle.dump(vectorstore_huggingface, f)
+        time.sleep(2)
+    else:
+        main_placeholder.text("Embeddings could not be generated, skipping FAISS index creation.")
 query = main_placeholder.text_input("Question: ")
 if query: