Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -41,6 +41,14 @@ if process_url_clicked:
|
|
| 41 |
main_placeholder.text("Text Splitter...Started...β
β
β
")
|
| 42 |
docs = text_splitter.split_documents(data)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# Create embeddings using HuggingFaceEmbeddings
|
| 45 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 46 |
main_placeholder.text("Embedding Vector Started Building...β
β
β
")
|
|
@@ -48,22 +56,35 @@ if process_url_clicked:
|
|
| 48 |
# Generate embeddings
|
| 49 |
embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# Convert embeddings to numpy array (needed by FAISS)
|
| 52 |
embeddings_np = np.array(embeddings).astype(np.float32)
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
index = FAISS(dimension)
|
| 57 |
-
index.add(embeddings_np) # Add embeddings to FAISS index
|
| 58 |
|
| 59 |
-
#
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
query = main_placeholder.text_input("Question: ")
|
| 69 |
if query:
|
|
@@ -89,3 +110,4 @@ if query:
|
|
| 89 |
|
| 90 |
|
| 91 |
|
|
|
|
|
|
| 41 |
main_placeholder.text("Text Splitter...Started...β
β
β
")
|
| 42 |
docs = text_splitter.split_documents(data)
|
| 43 |
|
| 44 |
+
# Debugging: Check if docs is empty
|
| 45 |
+
if not docs:
|
| 46 |
+
main_placeholder.text("No valid documents found! Please check the URLs.")
|
| 47 |
+
|
| 48 |
+
# Debugging: Check the content of docs
|
| 49 |
+
for doc in docs:
|
| 50 |
+
main_placeholder.text(f"Document content: {doc.page_content[:200]}") # Show first 200 chars of each document
|
| 51 |
+
|
| 52 |
# Create embeddings using HuggingFaceEmbeddings
|
| 53 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 54 |
main_placeholder.text("Embedding Vector Started Building...β
β
β
")
|
|
|
|
| 56 |
# Generate embeddings
|
| 57 |
embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
|
| 58 |
|
| 59 |
+
# Debugging: Check if embeddings are generated
|
| 60 |
+
if not embeddings:
|
| 61 |
+
main_placeholder.text("No embeddings were generated! Check the embedding model or document content.")
|
| 62 |
+
|
| 63 |
+
# Check the size of embeddings
|
| 64 |
+
main_placeholder.text(f"Generated {len(embeddings)} embeddings.")
|
| 65 |
+
|
| 66 |
# Convert embeddings to numpy array (needed by FAISS)
|
| 67 |
embeddings_np = np.array(embeddings).astype(np.float32)
|
| 68 |
|
| 69 |
+
# Check the shape of the embeddings
|
| 70 |
+
main_placeholder.text(f"Shape of embeddings: {embeddings_np.shape}")
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
# Create FAISS index
|
| 73 |
+
if len(embeddings) > 0:
|
| 74 |
+
dimension = len(embeddings[0]) # Embedding vector dimension
|
| 75 |
+
index = FAISS(dimension)
|
| 76 |
+
index.add(embeddings_np) # Add embeddings to FAISS index
|
| 77 |
+
|
| 78 |
+
# Wrap FAISS index using LangChain FAISS wrapper
|
| 79 |
+
vectorstore_huggingface = FAISS(embedding_function=embedding_model, index=index)
|
| 80 |
+
|
| 81 |
+
# Save the FAISS index to a pickle file
|
| 82 |
+
with open(file_path, "wb") as f:
|
| 83 |
+
pickle.dump(vectorstore_huggingface, f)
|
| 84 |
+
|
| 85 |
+
time.sleep(2)
|
| 86 |
+
else:
|
| 87 |
+
main_placeholder.text("Embeddings could not be generated, skipping FAISS index creation.")
|
| 88 |
|
| 89 |
query = main_placeholder.text_input("Question: ")
|
| 90 |
if query:
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
|
| 113 |
+
|