brainsqueeze's picture
v3 (#2)
f5c9c80 verified
from typing import TypedDict, Annotated
import logging
from langchain_core.tools import tool
import httpx
from ask_candid.tools.utils import format_candid_profile_link
from ask_candid.base.config.rest import SEARCH
logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)
class GrantRecord(TypedDict):
funder_id: Annotated[str, "Unique Candid ID value for the funder of the grant"]
funder_profile_link: Annotated[str, "Link to the Candid profile for the funder of the grant"]
funder_name: Annotated[str, "Name of the funder of the grant"]
recipient_id: Annotated[str, "Unique Candid ID value for the recipient of the grant"]
recipient_profile_link: Annotated[str, "Link to the Candid profile for the recipient of the grant"]
recipient_name: Annotated[str, "Name of the recipient of the grant"]
fiscal_year: Annotated[int | float, "Fiscal year that the grant was awarded"]
amount_usd: Annotated[int | float, "Dollar amount of the grant awarded in USD"]
description: Annotated[str, "Description of the purpose of the grant"]
working_on: Annotated[str, "Description of the subject purpose of the grant"]
serving: Annotated[str, "Description of the population groups served by the grant"]
@tool
def grants_search(
query: str,
subject_codes: str | None = None,
populations_served_codes: str | None = None,
geonameids_of_geographies_served: str | None = None
# ) -> list[dict[str, str | int | float | None]] | str:
) -> list[GrantRecord] | str:
"""Search for historical grants to find context about what is happening in the sector, and what organizations are
involved with. This is intended for historial research purposes and contextualization. If trying to recommend
funders then please use the dedicated funder recommendation tool instead of this. Funder recommendations uses grants
and additional contexts, as well as a carefully trained graph neural network to provide targeted recommendations.
Another important note is that this tool only returns up to 25 top relevant grant results and should never be used
to make broad generalizations.
Queries are natural text, and the retrieval mechanism is a hybrid approach of keywords and sparse vector searches
over fields which describe the activity and purpose of the grant.
While extra subject codes, populations served codes, and geography IDs for where the grant is serving is not
required, grants may become more specific the more information can be provided.
Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
the program they are seeking funding for.
Geographies can be determined using the geo detection tool if the requester can supply a description of the program
they are seeking funding for.
Parameters
----------
query : str
Text describing a user's question or a description of investigative work which requires support from Candid's
grants knowledge base
subject_codes : str | None, optional
Subject codes from Candid's PCS taxonomy, comma separated, by default None
populations_served_codes : str | None, optional
Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
geonameids_of_geographies_served : str | None, optional
Geonames ID values for geographies served by the requester's program, comma separted, by default None
Examples
--------
>>> grants_search(query='homeless shelters in new york')
>>> grants_search(
query='homeless shelters in new york',
subject_codes='SS050000, SS000000,SB050000',
populations_served_codes='PJ050100',
geonameids_of_geographies_served='4094212,4094212'
)
Returns
-------
list[GrantRecord] | str
Array of relevant grants and information about the organizations involved
If output is a string then that means there was some error, and retry should be considered
"""
payload = {"query": query, "rowCount": 25}
if subject_codes is not None:
payload["SubjectArea"] = subject_codes.split(',')
if populations_served_codes is not None:
payload["PopulationServed"] = populations_served_codes.split(',')
if geonameids_of_geographies_served:
payload["GeographicArea"] = geonameids_of_geographies_served.split(',')
with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
r = client.get(
url=SEARCH.endpoint("v1/grants/discovery"),
params=payload,
headers={**SEARCH.header} # type: ignore
)
if r.status_code != 200:
logger.error("Error calling grants search API %s. Error: %s", str(r.request.url), r.reason_phrase)
return f"Error calling grants search. Error: {r.reason_phrase}"
data: dict = r.json()
output = []
for grant in data.get("grants") or []:
working_on, serving = [], []
for facet, data in grant["pcsV3"].items():
if facet == "subject":
working_on.extend([code["name"].lower() for code in data["value"]])
elif facet == "population":
serving.extend([code["name"].lower() for code in data["value"]])
# output.append({
# "funder_id": grant["grantmakerId"],
# "funder_profile_link": format_candid_profile_link(grant["grantmakerId"]),
# "funder_name": grant["grantmakerName"],
# "recipient_id": grant["recipientId"],
# "recipient_profile_link": format_candid_profile_link(grant["recipientId"]),
# "recipient_name": grant["recipientName"],
# "fiscal_year": grant["fiscalYear"],
# "amound_usd": grant["amountUsd"],
# "description": grant["text"],
# "working_on": f"Working on {', '.join(working_on)}",
# "serving": f"Serving population groups {', '.join(serving)}",
# })
output.append(GrantRecord(
funder_id=grant["grantmakerId"],
funder_profile_link=format_candid_profile_link(grant["grantmakerId"]),
funder_name=grant["grantmakerName"],
recipient_id=grant["recipientId"],
recipient_profile_link=format_candid_profile_link(grant["recipientId"]),
recipient_name=grant["recipientName"],
fiscal_year=grant["fiscalYear"],
amount_usd=grant["amountUsd"],
description=grant["text"],
working_on=f"Working on {', '.join(working_on)}",
serving=f"Serving population groups {', '.join(serving)}"
))
if not output:
return "No grants were found, try a different search strategy."
return output