Changed Similarity score/chunk size
Browse files
app.py
CHANGED
|
@@ -64,7 +64,7 @@ async def document_index(request: DocumentIndexRequest, x_api_key: str = Header(
|
|
| 64 |
#print('text_content',text_content)
|
| 65 |
|
| 66 |
# Chunking text using semantic chunking
|
| 67 |
-
chunks = cumulative_semantic_chunking(text_content, max_chunk_size=
|
| 68 |
logging.info(f"Text content chunked into {len(chunks)} chunks.")
|
| 69 |
# Embed chunks
|
| 70 |
embeddings, total_tokens = embed_chunks(chunks)
|
|
|
|
| 64 |
#print('text_content',text_content)
|
| 65 |
|
| 66 |
# Chunking text using semantic chunking
|
| 67 |
+
chunks = cumulative_semantic_chunking(text_content, max_chunk_size=2048, similarity_threshold=0.6)
|
| 68 |
logging.info(f"Text content chunked into {len(chunks)} chunks.")
|
| 69 |
# Embed chunks
|
| 70 |
embeddings, total_tokens = embed_chunks(chunks)
|