Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,13 +10,16 @@ import nltk
|
|
| 10 |
nltk.download('punkt')
|
| 11 |
nltk.download('punkt_tab')
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
| 14 |
# Load the Ubuntu manual from a .txt file
|
| 15 |
try:
|
| 16 |
-
with open(
|
| 17 |
full_text = file.read()
|
| 18 |
except FileNotFoundError:
|
| 19 |
-
raise FileNotFoundError("The file
|
| 20 |
|
| 21 |
# Function to chunk the text into smaller pieces
|
| 22 |
def chunk_text(text, chunk_size=500): # Larger chunks
|
|
@@ -41,10 +44,9 @@ manual_chunks = chunk_text(full_text, chunk_size=500)
|
|
| 41 |
|
| 42 |
# Load your FAISS index
|
| 43 |
try:
|
| 44 |
-
|
| 45 |
-
index = faiss.read_index("/absolute/path/to/manual_chunked_faiss_index_500.bin")
|
| 46 |
except Exception as e:
|
| 47 |
-
raise RuntimeError(f"Failed to load FAISS index: {e}")
|
| 48 |
|
| 49 |
# Load your embedding model
|
| 50 |
embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
|
|
@@ -52,7 +54,6 @@ embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
|
|
| 52 |
# OpenAI API key
|
| 53 |
openai.api_key = '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'
|
| 54 |
|
| 55 |
-
|
| 56 |
# Function to create embeddings
|
| 57 |
def embed_text(text_list):
|
| 58 |
return np.array(embedding_model.encode(text_list), dtype=np.float32)
|
|
@@ -64,6 +65,8 @@ def retrieve_chunks(query, k=5):
|
|
| 64 |
# Search the FAISS index
|
| 65 |
try:
|
| 66 |
distances, indices = index.search(query_embedding, k=k)
|
|
|
|
|
|
|
| 67 |
except Exception as e:
|
| 68 |
raise RuntimeError(f"FAISS search failed: {e}")
|
| 69 |
|
|
|
|
| 10 |
nltk.download('punkt')
|
| 11 |
nltk.download('punkt_tab')
|
| 12 |
|
| 13 |
+
# Define paths as variables
|
| 14 |
+
manual_path = "ubuntu_manual.txt"
|
| 15 |
+
faiss_path = "manual_chunked_faiss_index_500.bin"
|
| 16 |
+
|
| 17 |
# Load the Ubuntu manual from a .txt file
|
| 18 |
try:
|
| 19 |
+
with open(manual_path, "r", encoding="utf-8") as file:
|
| 20 |
full_text = file.read()
|
| 21 |
except FileNotFoundError:
|
| 22 |
+
raise FileNotFoundError(f"The file {manual_path} was not found.")
|
| 23 |
|
| 24 |
# Function to chunk the text into smaller pieces
|
| 25 |
def chunk_text(text, chunk_size=500): # Larger chunks
|
|
|
|
| 44 |
|
| 45 |
# Load your FAISS index
|
| 46 |
try:
|
| 47 |
+
index = faiss.read_index(faiss_path)
|
|
|
|
| 48 |
except Exception as e:
|
| 49 |
+
raise RuntimeError(f"Failed to load FAISS index from {faiss_path}: {e}")
|
| 50 |
|
| 51 |
# Load your embedding model
|
| 52 |
embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
|
|
|
|
| 54 |
# OpenAI API key
|
| 55 |
openai.api_key = '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'
|
| 56 |
|
|
|
|
| 57 |
# Function to create embeddings
|
| 58 |
def embed_text(text_list):
|
| 59 |
return np.array(embedding_model.encode(text_list), dtype=np.float32)
|
|
|
|
| 65 |
# Search the FAISS index
|
| 66 |
try:
|
| 67 |
distances, indices = index.search(query_embedding, k=k)
|
| 68 |
+
print("Indices:", indices)
|
| 69 |
+
print("Distances:", distances)
|
| 70 |
except Exception as e:
|
| 71 |
raise RuntimeError(f"FAISS search failed: {e}")
|
| 72 |
|