Spaces:
Paused
Paused
Commit
·
b2f993e
1
Parent(s):
05201cc
commenting out unused stuff
Browse files
agents.py
CHANGED
|
@@ -55,7 +55,7 @@ voice_editor_agent = create_team_agent(
|
|
| 55 |
|
| 56 |
simple_rag_chain = (
|
| 57 |
{
|
| 58 |
-
"context": itemgetter("question") | models.
|
| 59 |
"question": itemgetter("question"),
|
| 60 |
"writing_style_guide": lambda _: prompts.style_guide_text
|
| 61 |
}
|
|
|
|
| 55 |
|
| 56 |
simple_rag_chain = (
|
| 57 |
{
|
| 58 |
+
"context": itemgetter("question") | models.semantic_tuned_retriever,
|
| 59 |
"question": itemgetter("question"),
|
| 60 |
"writing_style_guide": lambda _: prompts.style_guide_text
|
| 61 |
}
|
models.py
CHANGED
|
@@ -28,20 +28,20 @@ callback_manager = CallbackManager([tracer])
|
|
| 28 |
### Chat Models ###
|
| 29 |
########################
|
| 30 |
|
| 31 |
-
opus3 = ChatAnthropic(
|
| 32 |
-
api_key=constants.ANTRHOPIC_API_KEY,
|
| 33 |
-
temperature=0,
|
| 34 |
-
model='claude-3-opus-20240229',
|
| 35 |
-
callbacks=callback_manager
|
| 36 |
-
)
|
| 37 |
-
|
| 38 |
-
sonnet35 = ChatAnthropic(
|
| 39 |
-
api_key=constants.ANTRHOPIC_API_KEY,
|
| 40 |
-
temperature=0,
|
| 41 |
-
model='claude-3-5-sonnet-20240620',
|
| 42 |
-
max_tokens=4096,
|
| 43 |
-
callbacks=callback_manager
|
| 44 |
-
)
|
| 45 |
|
| 46 |
gpt4 = ChatOpenAI(
|
| 47 |
model="gpt-4",
|
|
@@ -77,20 +77,20 @@ gpt4o_mini = ChatOpenAI(
|
|
| 77 |
### Embedding Models ###
|
| 78 |
########################
|
| 79 |
|
| 80 |
-
basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
|
| 81 |
|
| 82 |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
|
| 83 |
|
| 84 |
-
te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
|
| 85 |
|
| 86 |
#######################
|
| 87 |
### Text Splitters ###
|
| 88 |
#######################
|
| 89 |
|
| 90 |
-
semanticChunker = SemanticChunker(
|
| 91 |
-
te3_small,
|
| 92 |
-
breakpoint_threshold_type="percentile"
|
| 93 |
-
)
|
| 94 |
|
| 95 |
semanticChunker_tuned = SemanticChunker(
|
| 96 |
tuned_embeddings,
|
|
@@ -98,12 +98,12 @@ semanticChunker_tuned = SemanticChunker(
|
|
| 98 |
breakpoint_threshold_amount=85
|
| 99 |
)
|
| 100 |
|
| 101 |
-
RCTS = RecursiveCharacterTextSplitter(
|
| 102 |
-
# Set a really small chunk size, just to show.
|
| 103 |
-
chunk_size=500,
|
| 104 |
-
chunk_overlap=25,
|
| 105 |
-
length_function=len,
|
| 106 |
-
)
|
| 107 |
|
| 108 |
#######################
|
| 109 |
### Vector Stores ###
|
|
@@ -111,17 +111,17 @@ RCTS = RecursiveCharacterTextSplitter(
|
|
| 111 |
|
| 112 |
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY)
|
| 113 |
|
| 114 |
-
semantic_Qdrant_vs = QdrantVectorStore(
|
| 115 |
-
client=qdrant_client,
|
| 116 |
-
collection_name="docs_from_ripped_urls",
|
| 117 |
-
embedding=te3_small
|
| 118 |
-
)
|
| 119 |
-
|
| 120 |
-
rcts_Qdrant_vs = QdrantVectorStore(
|
| 121 |
-
client=qdrant_client,
|
| 122 |
-
collection_name="docs_from_ripped_urls_recursive",
|
| 123 |
-
embedding=te3_small
|
| 124 |
-
)
|
| 125 |
|
| 126 |
semantic_tuned_Qdrant_vs = QdrantVectorStore(
|
| 127 |
client=qdrant_client,
|
|
|
|
| 28 |
### Chat Models ###
|
| 29 |
########################
|
| 30 |
|
| 31 |
+
#opus3 = ChatAnthropic(
|
| 32 |
+
# api_key=constants.ANTRHOPIC_API_KEY,
|
| 33 |
+
# temperature=0,
|
| 34 |
+
# model='claude-3-opus-20240229',
|
| 35 |
+
# callbacks=callback_manager
|
| 36 |
+
#)
|
| 37 |
+
#
|
| 38 |
+
#sonnet35 = ChatAnthropic(
|
| 39 |
+
# api_key=constants.ANTRHOPIC_API_KEY,
|
| 40 |
+
# temperature=0,
|
| 41 |
+
# model='claude-3-5-sonnet-20240620',
|
| 42 |
+
# max_tokens=4096,
|
| 43 |
+
# callbacks=callback_manager
|
| 44 |
+
#)
|
| 45 |
|
| 46 |
gpt4 = ChatOpenAI(
|
| 47 |
model="gpt-4",
|
|
|
|
| 77 |
### Embedding Models ###
|
| 78 |
########################
|
| 79 |
|
| 80 |
+
#basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
|
| 81 |
|
| 82 |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
|
| 83 |
|
| 84 |
+
#te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
|
| 85 |
|
| 86 |
#######################
|
| 87 |
### Text Splitters ###
|
| 88 |
#######################
|
| 89 |
|
| 90 |
+
#semanticChunker = SemanticChunker(
|
| 91 |
+
# te3_small,
|
| 92 |
+
# breakpoint_threshold_type="percentile"
|
| 93 |
+
#)
|
| 94 |
|
| 95 |
semanticChunker_tuned = SemanticChunker(
|
| 96 |
tuned_embeddings,
|
|
|
|
| 98 |
breakpoint_threshold_amount=85
|
| 99 |
)
|
| 100 |
|
| 101 |
+
#RCTS = RecursiveCharacterTextSplitter(
|
| 102 |
+
# # Set a really small chunk size, just to show.
|
| 103 |
+
# chunk_size=500,
|
| 104 |
+
# chunk_overlap=25,
|
| 105 |
+
# length_function=len,
|
| 106 |
+
#)
|
| 107 |
|
| 108 |
#######################
|
| 109 |
### Vector Stores ###
|
|
|
|
| 111 |
|
| 112 |
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY)
|
| 113 |
|
| 114 |
+
#semantic_Qdrant_vs = QdrantVectorStore(
|
| 115 |
+
# client=qdrant_client,
|
| 116 |
+
# collection_name="docs_from_ripped_urls",
|
| 117 |
+
# embedding=te3_small
|
| 118 |
+
#)
|
| 119 |
+
#
|
| 120 |
+
#rcts_Qdrant_vs = QdrantVectorStore(
|
| 121 |
+
# client=qdrant_client,
|
| 122 |
+
# collection_name="docs_from_ripped_urls_recursive",
|
| 123 |
+
# embedding=te3_small
|
| 124 |
+
#)
|
| 125 |
|
| 126 |
semantic_tuned_Qdrant_vs = QdrantVectorStore(
|
| 127 |
client=qdrant_client,
|
tools.py
CHANGED
|
@@ -1,20 +1,34 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
-
from typing import Annotated, Optional
|
| 3 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 4 |
from langchain_core.tools import tool
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
WORKING_DIRECTORY = Path("/tmp/content/data")
|
| 8 |
WORKING_DIRECTORY.mkdir(parents=True, exist_ok=True)
|
| 9 |
|
| 10 |
tavily_tool = TavilySearchResults(max_results=5)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@tool
|
| 13 |
def retrieve_information(
|
| 14 |
query: Annotated[str, "query to ask the retrieve information tool"]
|
| 15 |
):
|
| 16 |
"""Use Retrieval Augmented Generation to retrieve information about the 'Extending Llama-3’s Context Ten-Fold Overnight' paper."""
|
| 17 |
-
return
|
| 18 |
|
| 19 |
@tool
|
| 20 |
def create_outline(points: List[str], file_name: str) -> str:
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
+
from typing import Annotated, Dict, List, Optional
|
| 3 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 4 |
from langchain_core.tools import tool
|
| 5 |
+
import prompts
|
| 6 |
+
import models
|
| 7 |
+
from operator import itemgetter
|
| 8 |
+
from langchain_core.runnables.passthrough import RunnablePassthrough
|
| 9 |
|
| 10 |
WORKING_DIRECTORY = Path("/tmp/content/data")
|
| 11 |
WORKING_DIRECTORY.mkdir(parents=True, exist_ok=True)
|
| 12 |
|
| 13 |
tavily_tool = TavilySearchResults(max_results=5)
|
| 14 |
|
| 15 |
+
tool_chain = (
|
| 16 |
+
{
|
| 17 |
+
"context": itemgetter("question") | models.semantic_tuned_retriever,
|
| 18 |
+
"question": itemgetter("question"),
|
| 19 |
+
"writing_style_guide": lambda _: prompts.style_guide_text
|
| 20 |
+
}
|
| 21 |
+
| RunnablePassthrough.assign(context=itemgetter("context"))
|
| 22 |
+
| prompts.chat_prompt
|
| 23 |
+
| models.gpt4o
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
@tool
|
| 27 |
def retrieve_information(
|
| 28 |
query: Annotated[str, "query to ask the retrieve information tool"]
|
| 29 |
):
|
| 30 |
"""Use Retrieval Augmented Generation to retrieve information about the 'Extending Llama-3’s Context Ten-Fold Overnight' paper."""
|
| 31 |
+
return tool_chain.invoke({"question" : query})
|
| 32 |
|
| 33 |
@tool
|
| 34 |
def create_outline(points: List[str], file_name: str) -> str:
|