Spaces:
Running
Running
v3 (#2)
Browse files- ask_candid/base/config/constants.py +13 -0
- ask_candid/base/config/models.py +12 -0
- ask_candid/base/retrieval/sources.py +5 -2
- ask_candid/chat.py +3 -3
- ask_candid/services/knowledge_base.py +13 -9
- ask_candid/tools/general.py +10 -2
- ask_candid/tools/grants.py +48 -15
- ask_candid/tools/org_search.py +13 -4
- ask_candid/tools/recommendations.py +57 -22
- ask_candid/utils.py +0 -11
- chat_v2.py +6 -6
ask_candid/base/config/constants.py
CHANGED
|
@@ -2,3 +2,16 @@ START_SYSTEM_PROMPT = (
|
|
| 2 |
"You are a Candid subject matter expert on the social sector and philanthropy. "
|
| 3 |
"You should address the user's queries and stay on topic."
|
| 4 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"You are a Candid subject matter expert on the social sector and philanthropy. "
|
| 3 |
"You should address the user's queries and stay on topic."
|
| 4 |
)
|
| 5 |
+
|
| 6 |
+
TONE_PROMPT = (
|
| 7 |
+
"You must be cordial with the user. You should be helpful, but NEVER be sychophantic. "
|
| 8 |
+
"NEVER use extreme rhetoric such as 'perfect', 'excellent' or 'amazing'. "
|
| 9 |
+
"Be realistic, NEVER convey confidence when responding in a context with a high degree of uncertainty. "
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
FEEDBACK_PROMPT = (
|
| 13 |
+
"NEVER assume that your responses are sufficiently answering the user's inquiry. "
|
| 14 |
+
"Ask for clarification from the user if the intent is not clear. "
|
| 15 |
+
"Ask the user if responses are helpful/userful, "
|
| 16 |
+
"and if any further context is needed to hone in on a better response."
|
| 17 |
+
)
|
ask_candid/base/config/models.py
CHANGED
|
@@ -1,9 +1,21 @@
|
|
| 1 |
from types import MappingProxyType
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
Name2Endpoint = MappingProxyType({
|
| 4 |
"gpt-4o": "gpt-4o",
|
| 5 |
"claude-3.5-haiku": "us.anthropic.claude-3-5-haiku-20241022-v1:0",
|
| 6 |
"claude-4-sonnet": "us.anthropic.claude-sonnet-4-20250514-v1:0",
|
|
|
|
| 7 |
# "llama-3.1-70b-instruct": "us.meta.llama3-1-70b-instruct-v1:0",
|
| 8 |
# "mistral-large": "mistral.mistral-large-2402-v1:0",
|
| 9 |
# "mixtral-8x7B": "mistral.mixtral-8x7b-instruct-v0:1",
|
|
|
|
| 1 |
from types import MappingProxyType
|
| 2 |
+
from enum import Enum
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class BedrockEndpoints(Enum):
|
| 6 |
+
claude_35_haiku = "us.anthropic.claude-3-5-haiku-20241022-v1:0"
|
| 7 |
+
claude_4_sonnet = "us.anthropic.claude-sonnet-4-20250514-v1:0"
|
| 8 |
+
claude_45_sonnet = "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
|
| 9 |
+
llama_31_70b_instruct = "us.meta.llama3-1-70b-instruct-v1:0"
|
| 10 |
+
mistral_large = "mistral.mistral-large-2402-v1:0"
|
| 11 |
+
mixtral_8x7b = "mistral.mixtral-8x7b-instruct-v0:1"
|
| 12 |
+
|
| 13 |
|
| 14 |
Name2Endpoint = MappingProxyType({
|
| 15 |
"gpt-4o": "gpt-4o",
|
| 16 |
"claude-3.5-haiku": "us.anthropic.claude-3-5-haiku-20241022-v1:0",
|
| 17 |
"claude-4-sonnet": "us.anthropic.claude-sonnet-4-20250514-v1:0",
|
| 18 |
+
"claude-4.5-sonnet": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
| 19 |
# "llama-3.1-70b-instruct": "us.meta.llama3-1-70b-instruct-v1:0",
|
| 20 |
# "mistral-large": "mistral.mistral-large-2402-v1:0",
|
| 21 |
# "mixtral-8x7B": "mistral.mixtral-8x7b-instruct-v0:1",
|
ask_candid/base/retrieval/sources.py
CHANGED
|
@@ -2,8 +2,11 @@ from ask_candid.base.retrieval.schemas import ElasticSourceConfig
|
|
| 2 |
|
| 3 |
|
| 4 |
CandidBlogConfig = ElasticSourceConfig(
|
| 5 |
-
index_name="search-semantic-
|
| 6 |
-
semantic_fields=("
|
|
|
|
|
|
|
|
|
|
| 7 |
)
|
| 8 |
|
| 9 |
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
CandidBlogConfig = ElasticSourceConfig(
|
| 5 |
+
index_name="search-semantic-blog",
|
| 6 |
+
semantic_fields=("semantic_title_summary_tags_text", "semantic_authors_text","semantic_content"),
|
| 7 |
+
text_fields=("title", "summary", "content", "authors_text"),
|
| 8 |
+
highlight_fields=("semantic_content",),
|
| 9 |
+
excluded_fields=("content",)
|
| 10 |
)
|
| 11 |
|
| 12 |
|
ask_candid/chat.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from typing import TypedDict, Literal, Any
|
| 2 |
-
from collections.abc import Iterator
|
| 3 |
from dataclasses import asdict
|
| 4 |
import logging
|
| 5 |
import json
|
|
@@ -34,11 +34,11 @@ class ToolResult(TypedDict):
|
|
| 34 |
interrupts: list
|
| 35 |
|
| 36 |
|
| 37 |
-
def convert_history_for_graph_agent(history:
|
| 38 |
_hist = []
|
| 39 |
for h in history:
|
| 40 |
if isinstance(h, ChatMessage):
|
| 41 |
-
h = asdict(h)
|
| 42 |
|
| 43 |
if h.get("content"):
|
| 44 |
# if h.get("metadata"):
|
|
|
|
| 1 |
from typing import TypedDict, Literal, Any
|
| 2 |
+
from collections.abc import Iterator, Sequence
|
| 3 |
from dataclasses import asdict
|
| 4 |
import logging
|
| 5 |
import json
|
|
|
|
| 34 |
interrupts: list
|
| 35 |
|
| 36 |
|
| 37 |
+
def convert_history_for_graph_agent(history: Sequence[dict | ChatMessage]) -> list[dict]:
|
| 38 |
_hist = []
|
| 39 |
for h in history:
|
| 40 |
if isinstance(h, ChatMessage):
|
| 41 |
+
h = asdict(h) # noqa: PLW2901
|
| 42 |
|
| 43 |
if h.get("content"):
|
| 44 |
# if h.get("metadata"):
|
ask_candid/services/knowledge_base.py
CHANGED
|
@@ -118,8 +118,13 @@ def generate_queries(
|
|
| 118 |
|
| 119 |
for source_name in sources:
|
| 120 |
if source_name == "Candid Blog":
|
| 121 |
-
q =
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
q["size"] = 5
|
| 124 |
vector_queries.extend([{"index": S.CandidBlogConfig.index_name}, q])
|
| 125 |
elif source_name == "Candid Help":
|
|
@@ -289,7 +294,7 @@ def reranker(
|
|
| 289 |
text = '\n'.join(highlight_texts)
|
| 290 |
texts.append(text)
|
| 291 |
|
| 292 |
-
if search_text and len(texts) == len(results) and len(texts) >
|
| 293 |
logger.info("Re-ranking %d retrieval results", len(results))
|
| 294 |
scores = sparse_encoder.query_reranking(query=search_text, documents=texts)
|
| 295 |
for r, s in zip(results, scores):
|
|
@@ -361,14 +366,13 @@ def process_hit(hit: ElasticHitsResult) -> Document:
|
|
| 361 |
"url": f"https://www.youtube.com/watch?v={hit.source['video_id']}"
|
| 362 |
}
|
| 363 |
)
|
| 364 |
-
elif "
|
|
|
|
| 365 |
doc = Document(
|
| 366 |
page_content='\n\n'.join([
|
| 367 |
-
hit.source.get("
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
get_context("authors_text", hit, context_length=12, add_context=False),
|
| 371 |
-
hit.source.get("title_summary_tags", "")
|
| 372 |
]),
|
| 373 |
metadata={
|
| 374 |
"title": hit.source.get("title", ""),
|
|
|
|
| 118 |
|
| 119 |
for source_name in sources:
|
| 120 |
if source_name == "Candid Blog":
|
| 121 |
+
q = build_sparse_vector_and_text_query(
|
| 122 |
+
query=query,
|
| 123 |
+
semantic_fields=S.CandidBlogConfig.semantic_fields,
|
| 124 |
+
text_fields=S.CandidBlogConfig.text_fields,
|
| 125 |
+
highlight_fields=S.CandidBlogConfig.highlight_fields,
|
| 126 |
+
excluded_fields=S.CandidBlogConfig.excluded_fields
|
| 127 |
+
)
|
| 128 |
q["size"] = 5
|
| 129 |
vector_queries.extend([{"index": S.CandidBlogConfig.index_name}, q])
|
| 130 |
elif source_name == "Candid Help":
|
|
|
|
| 294 |
text = '\n'.join(highlight_texts)
|
| 295 |
texts.append(text)
|
| 296 |
|
| 297 |
+
if search_text and len(texts) == len(results) and len(texts) > max_num_results:
|
| 298 |
logger.info("Re-ranking %d retrieval results", len(results))
|
| 299 |
scores = sparse_encoder.query_reranking(query=search_text, documents=texts)
|
| 300 |
for r, s in zip(results, scores):
|
|
|
|
| 366 |
"url": f"https://www.youtube.com/watch?v={hit.source['video_id']}"
|
| 367 |
}
|
| 368 |
)
|
| 369 |
+
elif "blog" in hit.index:
|
| 370 |
+
highlight = hit.highlight or {}
|
| 371 |
doc = Document(
|
| 372 |
page_content='\n\n'.join([
|
| 373 |
+
hit.source.get("title_summary_tags_text", ""),
|
| 374 |
+
' '.join(highlight.get("semantic_content", [])),
|
| 375 |
+
hit.source.get("authors_text", "")
|
|
|
|
|
|
|
| 376 |
]),
|
| 377 |
metadata={
|
| 378 |
"title": hit.source.get("title", ""),
|
ask_candid/tools/general.py
CHANGED
|
@@ -5,8 +5,16 @@ from langchain_core.tools import tool
|
|
| 5 |
|
| 6 |
@tool
|
| 7 |
def get_current_day() -> date:
|
| 8 |
-
"""Get the current day to reference for any time-sensitive data requests.
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
Returns
|
| 12 |
-------
|
|
|
|
| 5 |
|
| 6 |
@tool
|
| 7 |
def get_current_day() -> date:
|
| 8 |
+
"""Get the current day to reference for any time-sensitive data requests.
|
| 9 |
+
|
| 10 |
+
ALWAYS call this tool:
|
| 11 |
+
* At the beginning of conversations involving dates or timelines
|
| 12 |
+
* Before searching news or time-sensitive data
|
| 13 |
+
* When interpreting or presenting any temporal information (recent, upcoming, last year, etc.)
|
| 14 |
+
* Before making statements about when events occurred or will occur
|
| 15 |
+
|
| 16 |
+
Never assume the correct date.
|
| 17 |
+
If data only includes partial date information, use this tool to infer missing details.
|
| 18 |
|
| 19 |
Returns
|
| 20 |
-------
|
ask_candid/tools/grants.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import logging
|
| 2 |
|
| 3 |
from langchain_core.tools import tool
|
|
@@ -11,13 +12,28 @@ logger = logging.getLogger(__name__)
|
|
| 11 |
logger.setLevel(logging.ERROR)
|
| 12 |
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
@tool
|
| 15 |
def grants_search(
|
| 16 |
query: str,
|
| 17 |
subject_codes: str | None = None,
|
| 18 |
populations_served_codes: str | None = None,
|
| 19 |
geonameids_of_geographies_served: str | None = None
|
| 20 |
-
) -> list[dict[str, str | int | float | None]] | str:
|
|
|
|
| 21 |
"""Search for historical grants to find context about what is happening in the sector, and what organizations are
|
| 22 |
involved with. This is intended for historial research purposes and contextualization. If trying to recommend
|
| 23 |
funders then please use the dedicated funder recommendation tool instead of this. Funder recommendations uses grants
|
|
@@ -62,7 +78,7 @@ def grants_search(
|
|
| 62 |
|
| 63 |
Returns
|
| 64 |
-------
|
| 65 |
-
list[
|
| 66 |
Array of relevant grants and information about the organizations involved
|
| 67 |
If output is a string then that means there was some error, and retry should be considered
|
| 68 |
"""
|
|
@@ -97,17 +113,34 @@ def grants_search(
|
|
| 97 |
elif facet == "population":
|
| 98 |
serving.extend([code["name"].lower() for code in data["value"]])
|
| 99 |
|
| 100 |
-
output.append({
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
return output
|
|
|
|
| 1 |
+
from typing import TypedDict, Annotated
|
| 2 |
import logging
|
| 3 |
|
| 4 |
from langchain_core.tools import tool
|
|
|
|
| 12 |
logger.setLevel(logging.ERROR)
|
| 13 |
|
| 14 |
|
| 15 |
+
class GrantRecord(TypedDict):
|
| 16 |
+
funder_id: Annotated[str, "Unique Candid ID value for the funder of the grant"]
|
| 17 |
+
funder_profile_link: Annotated[str, "Link to the Candid profile for the funder of the grant"]
|
| 18 |
+
funder_name: Annotated[str, "Name of the funder of the grant"]
|
| 19 |
+
recipient_id: Annotated[str, "Unique Candid ID value for the recipient of the grant"]
|
| 20 |
+
recipient_profile_link: Annotated[str, "Link to the Candid profile for the recipient of the grant"]
|
| 21 |
+
recipient_name: Annotated[str, "Name of the recipient of the grant"]
|
| 22 |
+
fiscal_year: Annotated[int | float, "Fiscal year that the grant was awarded"]
|
| 23 |
+
amount_usd: Annotated[int | float, "Dollar amount of the grant awarded in USD"]
|
| 24 |
+
description: Annotated[str, "Description of the purpose of the grant"]
|
| 25 |
+
working_on: Annotated[str, "Description of the subject purpose of the grant"]
|
| 26 |
+
serving: Annotated[str, "Description of the population groups served by the grant"]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
@tool
|
| 30 |
def grants_search(
|
| 31 |
query: str,
|
| 32 |
subject_codes: str | None = None,
|
| 33 |
populations_served_codes: str | None = None,
|
| 34 |
geonameids_of_geographies_served: str | None = None
|
| 35 |
+
# ) -> list[dict[str, str | int | float | None]] | str:
|
| 36 |
+
) -> list[GrantRecord] | str:
|
| 37 |
"""Search for historical grants to find context about what is happening in the sector, and what organizations are
|
| 38 |
involved with. This is intended for historial research purposes and contextualization. If trying to recommend
|
| 39 |
funders then please use the dedicated funder recommendation tool instead of this. Funder recommendations uses grants
|
|
|
|
| 78 |
|
| 79 |
Returns
|
| 80 |
-------
|
| 81 |
+
list[GrantRecord] | str
|
| 82 |
Array of relevant grants and information about the organizations involved
|
| 83 |
If output is a string then that means there was some error, and retry should be considered
|
| 84 |
"""
|
|
|
|
| 113 |
elif facet == "population":
|
| 114 |
serving.extend([code["name"].lower() for code in data["value"]])
|
| 115 |
|
| 116 |
+
# output.append({
|
| 117 |
+
# "funder_id": grant["grantmakerId"],
|
| 118 |
+
# "funder_profile_link": format_candid_profile_link(grant["grantmakerId"]),
|
| 119 |
+
# "funder_name": grant["grantmakerName"],
|
| 120 |
+
# "recipient_id": grant["recipientId"],
|
| 121 |
+
# "recipient_profile_link": format_candid_profile_link(grant["recipientId"]),
|
| 122 |
+
# "recipient_name": grant["recipientName"],
|
| 123 |
+
# "fiscal_year": grant["fiscalYear"],
|
| 124 |
+
# "amound_usd": grant["amountUsd"],
|
| 125 |
+
# "description": grant["text"],
|
| 126 |
+
# "working_on": f"Working on {', '.join(working_on)}",
|
| 127 |
+
# "serving": f"Serving population groups {', '.join(serving)}",
|
| 128 |
+
# })
|
| 129 |
+
|
| 130 |
+
output.append(GrantRecord(
|
| 131 |
+
funder_id=grant["grantmakerId"],
|
| 132 |
+
funder_profile_link=format_candid_profile_link(grant["grantmakerId"]),
|
| 133 |
+
funder_name=grant["grantmakerName"],
|
| 134 |
+
recipient_id=grant["recipientId"],
|
| 135 |
+
recipient_profile_link=format_candid_profile_link(grant["recipientId"]),
|
| 136 |
+
recipient_name=grant["recipientName"],
|
| 137 |
+
fiscal_year=grant["fiscalYear"],
|
| 138 |
+
amount_usd=grant["amountUsd"],
|
| 139 |
+
description=grant["text"],
|
| 140 |
+
working_on=f"Working on {', '.join(working_on)}",
|
| 141 |
+
serving=f"Serving population groups {', '.join(serving)}"
|
| 142 |
+
))
|
| 143 |
+
|
| 144 |
+
if not output:
|
| 145 |
+
return "No grants were found, try a different search strategy."
|
| 146 |
return output
|
ask_candid/tools/org_search.py
CHANGED
|
@@ -20,12 +20,13 @@ class OrganizationNames(BaseModel):
|
|
| 20 |
|
| 21 |
|
| 22 |
class OrganizationIdentifierArgs(BaseModel):
|
|
|
|
| 23 |
text: str = Field(..., description="Chat model response text which contains named organizations.")
|
| 24 |
|
| 25 |
|
| 26 |
class OrganizationIdentifier(BaseTool):
|
| 27 |
llm: BaseChatModel
|
| 28 |
-
parser:
|
| 29 |
template: str = """Extract only the names of officially recognized organizations, foundations, and government
|
| 30 |
entities from the text below. Do not include any entries that contain descriptions, regional identifiers, or
|
| 31 |
explanations within parentheses or following the name. Strictly exclude databases, resources, crowdfunding
|
|
@@ -35,11 +36,19 @@ class OrganizationIdentifier(BaseTool):
|
|
| 35 |
output format: ```{format_instructions}```
|
| 36 |
"""
|
| 37 |
|
| 38 |
-
name: str = "
|
| 39 |
description: str = """
|
| 40 |
Identify the names of nonprofits and foundations from chat model responses. If it is likely that a response contains
|
| 41 |
proper names then it should be processed through this tool.
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
Examples
|
| 44 |
--------
|
| 45 |
>>> `organization_identifier('My Favorite Foundation awarded a grant to My Favorite Nonprofit.')`
|
|
@@ -55,12 +64,12 @@ class OrganizationIdentifier(BaseTool):
|
|
| 55 |
)
|
| 56 |
return RunnableSequence(prompt, self.llm, self.parser)
|
| 57 |
|
| 58 |
-
def _run(self, text: str) -> str:
|
| 59 |
chain = self._build_pipeline()
|
| 60 |
result: OrganizationNames = chain.invoke({"chatbot_output": text})
|
| 61 |
return result.orgnames
|
| 62 |
|
| 63 |
-
async def _arun(self, text: str) -> str:
|
| 64 |
chain = self._build_pipeline()
|
| 65 |
result: OrganizationNames = await chain.ainvoke({"chatbot_output": text})
|
| 66 |
return result.orgnames
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
class OrganizationIdentifierArgs(BaseModel):
|
| 23 |
+
"""Input arguments for the organization identifier tool"""
|
| 24 |
text: str = Field(..., description="Chat model response text which contains named organizations.")
|
| 25 |
|
| 26 |
|
| 27 |
class OrganizationIdentifier(BaseTool):
|
| 28 |
llm: BaseChatModel
|
| 29 |
+
parser: PydanticOutputParser = PydanticOutputParser(pydantic_object=OrganizationNames)
|
| 30 |
template: str = """Extract only the names of officially recognized organizations, foundations, and government
|
| 31 |
entities from the text below. Do not include any entries that contain descriptions, regional identifiers, or
|
| 32 |
explanations within parentheses or following the name. Strictly exclude databases, resources, crowdfunding
|
|
|
|
| 36 |
output format: ```{format_instructions}```
|
| 37 |
"""
|
| 38 |
|
| 39 |
+
name: str = "organization_identifier"
|
| 40 |
description: str = """
|
| 41 |
Identify the names of nonprofits and foundations from chat model responses. If it is likely that a response contains
|
| 42 |
proper names then it should be processed through this tool.
|
| 43 |
|
| 44 |
+
Some tools have outputs with organizations already identified by Candid's data. These include:
|
| 45 |
+
* grant search
|
| 46 |
+
* organization search
|
| 47 |
+
* funder recommendations
|
| 48 |
+
* RFP recommendations
|
| 49 |
+
|
| 50 |
+
If these tools are invoked then use the IDs and profile URLs provided in their outputs, and DO NOT use this tool.
|
| 51 |
+
|
| 52 |
Examples
|
| 53 |
--------
|
| 54 |
>>> `organization_identifier('My Favorite Foundation awarded a grant to My Favorite Nonprofit.')`
|
|
|
|
| 64 |
)
|
| 65 |
return RunnableSequence(prompt, self.llm, self.parser)
|
| 66 |
|
| 67 |
+
def _run(self, text: str) -> list[str]:
|
| 68 |
chain = self._build_pipeline()
|
| 69 |
result: OrganizationNames = chain.invoke({"chatbot_output": text})
|
| 70 |
return result.orgnames
|
| 71 |
|
| 72 |
+
async def _arun(self, text: str) -> list[str]:
|
| 73 |
chain = self._build_pipeline()
|
| 74 |
result: OrganizationNames = await chain.ainvoke({"chatbot_output": text})
|
| 75 |
return result.orgnames
|
ask_candid/tools/recommendations.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from typing import Literal, Any
|
| 2 |
import logging
|
| 3 |
|
| 4 |
from langchain_core.tools import tool
|
|
@@ -13,6 +13,24 @@ logger = logging.getLogger(__name__)
|
|
| 13 |
logger.setLevel(logging.ERROR)
|
| 14 |
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
@retry_on_status(num_retries=3)
|
| 17 |
def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None) -> httpx.Response:
|
| 18 |
with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
|
|
@@ -25,7 +43,7 @@ def organization_search(
|
|
| 25 |
located_postal_code: str | None = None,
|
| 26 |
located_admin1: str | None = None,
|
| 27 |
search_mode: Literal["organization_only", "organization_and_grants"] | None = "organization_only"
|
| 28 |
-
) -> list[
|
| 29 |
"""Search for organizations by name, description or work, program descriptions and locations. Here are some
|
| 30 |
guidelines:
|
| 31 |
* `query` controls hybrid searching involving both vector search and keyword search
|
|
@@ -54,7 +72,7 @@ def organization_search(
|
|
| 54 |
|
| 55 |
Returns
|
| 56 |
-------
|
| 57 |
-
list[
|
| 58 |
List of the top organization search results
|
| 59 |
If output is a string then that means there was some error, and retry should be considered
|
| 60 |
"""
|
|
@@ -90,23 +108,40 @@ def organization_search(
|
|
| 90 |
elif code.startswith('S'):
|
| 91 |
working_on.append(description.lower())
|
| 92 |
|
| 93 |
-
output.append({
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
return output
|
| 111 |
|
| 112 |
|
|
@@ -201,7 +236,7 @@ def recommend_funders(
|
|
| 201 |
data.get("meta") or {},
|
| 202 |
[{
|
| 203 |
**r,
|
| 204 |
-
"
|
| 205 |
} for r in (data.get("recommendations") or [])]
|
| 206 |
)
|
| 207 |
|
|
@@ -282,6 +317,6 @@ def recommend_funding_opportunities(
|
|
| 282 |
data.get("meta") or {},
|
| 283 |
[{
|
| 284 |
**r,
|
| 285 |
-
"
|
| 286 |
} for r in (data.get("recommendations") or [])]
|
| 287 |
)
|
|
|
|
| 1 |
+
from typing import TypedDict, Literal, Annotated, Any
|
| 2 |
import logging
|
| 3 |
|
| 4 |
from langchain_core.tools import tool
|
|
|
|
| 13 |
logger.setLevel(logging.ERROR)
|
| 14 |
|
| 15 |
|
| 16 |
+
class OrganizationRecord(TypedDict):
|
| 17 |
+
nonprofit_id: Annotated[str, "Unique Candid ID value for the organization"]
|
| 18 |
+
name: Annotated[str, "Name of the organization"]
|
| 19 |
+
aka_name: Annotated[str, "'Also-known-as' name of the organization"]
|
| 20 |
+
acronym: Annotated[str, "Acronym of the name of the organization"]
|
| 21 |
+
city: Annotated[str, "City that the organization is located in"]
|
| 22 |
+
admin1: Annotated[str, "State, province, or canton that the organization is located in"]
|
| 23 |
+
country: Annotated[str, "Country that the organization is located in"]
|
| 24 |
+
ein: Annotated[str, "IRS employer identification number (EIN) of the organization, only relevant for US-based orgs"]
|
| 25 |
+
profile_link: Annotated[str, "Link to the Candid profile for the organization"]
|
| 26 |
+
working_on: Annotated[str, "Description of the subject purpose of the organization"]
|
| 27 |
+
serving: Annotated[str, "Description of the population groups served by the organization"]
|
| 28 |
+
transparency_level: Annotated[str, "Candid Seal level of the organization indicating transparency level"]
|
| 29 |
+
organization_roles: Annotated[str, "Roles of the organization (eg. grantmaker, recipient)"]
|
| 30 |
+
grants_awarded: Annotated[str, "Summary stats of the grants awarded by the organization"]
|
| 31 |
+
grants_received: Annotated[str, "Summary stats of the grants received by the organization"]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
@retry_on_status(num_retries=3)
|
| 35 |
def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None) -> httpx.Response:
|
| 36 |
with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
|
|
|
|
| 43 |
located_postal_code: str | None = None,
|
| 44 |
located_admin1: str | None = None,
|
| 45 |
search_mode: Literal["organization_only", "organization_and_grants"] | None = "organization_only"
|
| 46 |
+
) -> list[OrganizationRecord] | str:
|
| 47 |
"""Search for organizations by name, description or work, program descriptions and locations. Here are some
|
| 48 |
guidelines:
|
| 49 |
* `query` controls hybrid searching involving both vector search and keyword search
|
|
|
|
| 72 |
|
| 73 |
Returns
|
| 74 |
-------
|
| 75 |
+
list[OrganizationRecord] | str
|
| 76 |
List of the top organization search results
|
| 77 |
If output is a string then that means there was some error, and retry should be considered
|
| 78 |
"""
|
|
|
|
| 108 |
elif code.startswith('S'):
|
| 109 |
working_on.append(description.lower())
|
| 110 |
|
| 111 |
+
# output.append({
|
| 112 |
+
# "nonprofit_id": org["candidEntityID"],
|
| 113 |
+
# "name": org["orgName"],
|
| 114 |
+
# "aka_name": org["akaName"],
|
| 115 |
+
# "acronym": org["acronymName"],
|
| 116 |
+
# "city": org["city"],
|
| 117 |
+
# "admin1": org["admin1"],
|
| 118 |
+
# "country": org["countryName"],
|
| 119 |
+
# "EIN": org["ein"],
|
| 120 |
+
# "profile_link": format_candid_profile_link(org['candidEntityID']),
|
| 121 |
+
# "working_on": f"Working on {', '.join(working_on)}",
|
| 122 |
+
# "serving": f"Serving population groups {', '.join(serving)}",
|
| 123 |
+
# "transparency_level": org["seal"].get("description"),
|
| 124 |
+
# "organization_roles": ', '.join(org["roles"]),
|
| 125 |
+
# "grants_awarded": ', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
|
| 126 |
+
# "grants_received": ', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
|
| 127 |
+
# })
|
| 128 |
+
output.append(OrganizationRecord(
|
| 129 |
+
nonprofit_id=org["candidEntityID"],
|
| 130 |
+
name=org["orgName"],
|
| 131 |
+
aka_name=org["akaName"],
|
| 132 |
+
acronym=org["acronymName"],
|
| 133 |
+
city=org["city"],
|
| 134 |
+
admin1=org["admin1"],
|
| 135 |
+
country=org["countryName"],
|
| 136 |
+
ein=org["ein"],
|
| 137 |
+
profile_link=format_candid_profile_link(org['candidEntityID']),
|
| 138 |
+
working_on=f"Working on {', '.join(working_on)}",
|
| 139 |
+
serving=f"Serving population groups {', '.join(serving)}",
|
| 140 |
+
transparency_level=org["seal"].get("description"),
|
| 141 |
+
organization_roles=', '.join(org["roles"]),
|
| 142 |
+
grants_awarded=', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
|
| 143 |
+
grants_received=', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
|
| 144 |
+
))
|
| 145 |
return output
|
| 146 |
|
| 147 |
|
|
|
|
| 236 |
data.get("meta") or {},
|
| 237 |
[{
|
| 238 |
**r,
|
| 239 |
+
"profile_link": format_candid_profile_link(r['funder_id'])
|
| 240 |
} for r in (data.get("recommendations") or [])]
|
| 241 |
)
|
| 242 |
|
|
|
|
| 317 |
data.get("meta") or {},
|
| 318 |
[{
|
| 319 |
**r,
|
| 320 |
+
"profile_link": format_candid_profile_link(r['funder_id'])
|
| 321 |
} for r in (data.get("recommendations") or [])]
|
| 322 |
)
|
ask_candid/utils.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
from typing import Any
|
| 2 |
-
from uuid import uuid4
|
| 3 |
|
| 4 |
from langchain_core.documents import Document
|
| 5 |
|
|
@@ -51,13 +50,3 @@ def format_chat_ag_response(chatbot: list[Any]) -> list[Any]:
|
|
| 51 |
chatbot.pop(-1)
|
| 52 |
chatbot[-1]["content"] = chatbot[-1]["content"] + sources
|
| 53 |
return chatbot
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
def valid_inputs(*args) -> bool:
|
| 57 |
-
return any(a is not None or (isinstance(a, str) and a.strip() != '') for a in args)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
def get_session_id(thread_id: str | None) -> str:
|
| 61 |
-
if not thread_id:
|
| 62 |
-
thread_id = uuid4().hex
|
| 63 |
-
return thread_id
|
|
|
|
| 1 |
from typing import Any
|
|
|
|
| 2 |
|
| 3 |
from langchain_core.documents import Document
|
| 4 |
|
|
|
|
| 50 |
chatbot.pop(-1)
|
| 51 |
chatbot[-1]["content"] = chatbot[-1]["content"] + sources
|
| 52 |
return chatbot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chat_v2.py
CHANGED
|
@@ -14,7 +14,7 @@ from ask_candid.tools.search import search_candid_knowledge_base
|
|
| 14 |
from ask_candid.tools.general import get_current_day
|
| 15 |
from ask_candid.utils import html_format_docs_chat
|
| 16 |
from ask_candid.base.config.constants import START_SYSTEM_PROMPT
|
| 17 |
-
from ask_candid.base.config.models import
|
| 18 |
from ask_candid.chat import convert_history_for_graph_agent, format_tool_call, format_tool_response
|
| 19 |
|
| 20 |
try:
|
|
@@ -40,7 +40,7 @@ class LoggedComponents(TypedDict):
|
|
| 40 |
def build_execution_graph() -> CompiledStateGraph:
|
| 41 |
llm = ChatBedrock(
|
| 42 |
client=boto3.client("bedrock-runtime", region_name="us-east-1"),
|
| 43 |
-
model=
|
| 44 |
)
|
| 45 |
org_name_recognition = OrganizationIdentifier(llm=llm) # bind the main chat model to the tool
|
| 46 |
return create_react_agent(
|
|
@@ -82,14 +82,14 @@ async def execute(
|
|
| 82 |
if fname.endswith('.txt'):
|
| 83 |
with open(fname, 'r', encoding='utf8') as f:
|
| 84 |
history.append(gr.ChatMessage(role="user", content=f.read()))
|
| 85 |
-
yield gr.MultimodalTextbox(value=None, interactive=True), history
|
| 86 |
|
| 87 |
horizon = len(history)
|
| 88 |
-
|
|
|
|
| 89 |
|
|
|
|
| 90 |
graph = build_execution_graph()
|
| 91 |
|
| 92 |
-
history.append(gr.ChatMessage(role="assistant", content=""))
|
| 93 |
async for stream_mode, chunk in graph.astream(inputs, stream_mode=["messages", "tasks"]):
|
| 94 |
if stream_mode == "messages" and chunk[0].content:
|
| 95 |
for msg in chunk[0].content:
|
|
@@ -165,7 +165,7 @@ def build_chat_app():
|
|
| 165 |
None, # user
|
| 166 |
BOT_LOGO, # bot
|
| 167 |
),
|
| 168 |
-
height="
|
| 169 |
type="messages",
|
| 170 |
show_label=False,
|
| 171 |
show_copy_button=True,
|
|
|
|
| 14 |
from ask_candid.tools.general import get_current_day
|
| 15 |
from ask_candid.utils import html_format_docs_chat
|
| 16 |
from ask_candid.base.config.constants import START_SYSTEM_PROMPT
|
| 17 |
+
from ask_candid.base.config.models import BedrockEndpoints
|
| 18 |
from ask_candid.chat import convert_history_for_graph_agent, format_tool_call, format_tool_response
|
| 19 |
|
| 20 |
try:
|
|
|
|
| 40 |
def build_execution_graph() -> CompiledStateGraph:
|
| 41 |
llm = ChatBedrock(
|
| 42 |
client=boto3.client("bedrock-runtime", region_name="us-east-1"),
|
| 43 |
+
model=BedrockEndpoints.claude_35_haiku.value
|
| 44 |
)
|
| 45 |
org_name_recognition = OrganizationIdentifier(llm=llm) # bind the main chat model to the tool
|
| 46 |
return create_react_agent(
|
|
|
|
| 82 |
if fname.endswith('.txt'):
|
| 83 |
with open(fname, 'r', encoding='utf8') as f:
|
| 84 |
history.append(gr.ChatMessage(role="user", content=f.read()))
|
|
|
|
| 85 |
|
| 86 |
horizon = len(history)
|
| 87 |
+
history.append(gr.ChatMessage(role="assistant", content=""))
|
| 88 |
+
yield gr.MultimodalTextbox(value=None, interactive=True), history
|
| 89 |
|
| 90 |
+
inputs = {"messages": convert_history_for_graph_agent(history)}
|
| 91 |
graph = build_execution_graph()
|
| 92 |
|
|
|
|
| 93 |
async for stream_mode, chunk in graph.astream(inputs, stream_mode=["messages", "tasks"]):
|
| 94 |
if stream_mode == "messages" and chunk[0].content:
|
| 95 |
for msg in chunk[0].content:
|
|
|
|
| 165 |
None, # user
|
| 166 |
BOT_LOGO, # bot
|
| 167 |
),
|
| 168 |
+
height="60vh",
|
| 169 |
type="messages",
|
| 170 |
show_label=False,
|
| 171 |
show_copy_button=True,
|