Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,9 +10,9 @@ import nltk
|
|
| 10 |
nltk.download('punkt')
|
| 11 |
nltk.download('punkt_tab')
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
manual_path = "ubuntu_manual.txt"
|
| 15 |
faiss_path = "manual_chunked_faiss_index_500.bin"
|
|
|
|
| 16 |
|
| 17 |
# Load the Ubuntu manual from a .txt file
|
| 18 |
try:
|
|
@@ -22,7 +22,7 @@ except FileNotFoundError:
|
|
| 22 |
raise FileNotFoundError(f"The file {manual_path} was not found.")
|
| 23 |
|
| 24 |
# Function to chunk the text into smaller pieces
|
| 25 |
-
def chunk_text(text, chunk_size=500):
|
| 26 |
sentences = sent_tokenize(text)
|
| 27 |
chunks = []
|
| 28 |
current_chunk = []
|
|
@@ -46,31 +46,31 @@ manual_chunks = chunk_text(full_text, chunk_size=500)
|
|
| 46 |
try:
|
| 47 |
index = faiss.read_index(faiss_path)
|
| 48 |
except Exception as e:
|
| 49 |
-
raise RuntimeError(f"Failed to load FAISS index
|
| 50 |
|
| 51 |
# Load your embedding model
|
| 52 |
-
embedding_model = SentenceTransformer('
|
| 53 |
|
| 54 |
# OpenAI API key
|
| 55 |
-
openai.api_key = 'sk-proj-
|
| 56 |
|
| 57 |
# Function to create embeddings
|
| 58 |
def embed_text(text_list):
|
| 59 |
-
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# Function to retrieve relevant chunks for a user query
|
| 62 |
def retrieve_chunks(query, k=5):
|
| 63 |
query_embedding = embed_text([query])
|
| 64 |
|
| 65 |
-
# Search the FAISS index
|
| 66 |
try:
|
| 67 |
distances, indices = index.search(query_embedding, k=k)
|
| 68 |
-
print("Indices:", indices)
|
| 69 |
-
print("Distances:", distances)
|
| 70 |
except Exception as e:
|
| 71 |
raise RuntimeError(f"FAISS search failed: {e}")
|
| 72 |
-
|
| 73 |
-
# Check if indices are valid
|
| 74 |
if len(indices[0]) == 0:
|
| 75 |
return []
|
| 76 |
|
|
@@ -129,4 +129,3 @@ if __name__ == "__main__":
|
|
| 129 |
|
| 130 |
|
| 131 |
|
| 132 |
-
|
|
|
|
| 10 |
nltk.download('punkt')
|
| 11 |
nltk.download('punkt_tab')
|
| 12 |
|
| 13 |
+
# Paths
|
|
|
|
| 14 |
faiss_path = "manual_chunked_faiss_index_500.bin"
|
| 15 |
+
manual_path = "ubuntu_manual.txt"
|
| 16 |
|
| 17 |
# Load the Ubuntu manual from a .txt file
|
| 18 |
try:
|
|
|
|
| 22 |
raise FileNotFoundError(f"The file {manual_path} was not found.")
|
| 23 |
|
| 24 |
# Function to chunk the text into smaller pieces
|
| 25 |
+
def chunk_text(text, chunk_size=500):
|
| 26 |
sentences = sent_tokenize(text)
|
| 27 |
chunks = []
|
| 28 |
current_chunk = []
|
|
|
|
| 46 |
try:
|
| 47 |
index = faiss.read_index(faiss_path)
|
| 48 |
except Exception as e:
|
| 49 |
+
raise RuntimeError(f"Failed to load FAISS index: {e}")
|
| 50 |
|
| 51 |
# Load your embedding model
|
| 52 |
+
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 53 |
|
| 54 |
# OpenAI API key
|
| 55 |
+
openai.api_key = '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'
|
| 56 |
|
| 57 |
# Function to create embeddings
|
| 58 |
def embed_text(text_list):
|
| 59 |
+
embeddings = embedding_model.encode(text_list)
|
| 60 |
+
print("Embedding shape:", embeddings.shape) # Debugging: Print shape
|
| 61 |
+
return np.array(embeddings, dtype=np.float32)
|
| 62 |
|
| 63 |
# Function to retrieve relevant chunks for a user query
|
| 64 |
def retrieve_chunks(query, k=5):
|
| 65 |
query_embedding = embed_text([query])
|
| 66 |
|
|
|
|
| 67 |
try:
|
| 68 |
distances, indices = index.search(query_embedding, k=k)
|
| 69 |
+
print("Indices:", indices) # Debugging: Print indices
|
| 70 |
+
print("Distances:", distances) # Debugging: Print distances
|
| 71 |
except Exception as e:
|
| 72 |
raise RuntimeError(f"FAISS search failed: {e}")
|
| 73 |
+
|
|
|
|
| 74 |
if len(indices[0]) == 0:
|
| 75 |
return []
|
| 76 |
|
|
|
|
| 129 |
|
| 130 |
|
| 131 |
|
|
|