from typing import TypedDict, Annotated import logging from langchain_core.tools import tool import httpx from ask_candid.tools.utils import format_candid_profile_link from ask_candid.base.config.rest import SEARCH logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s") logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) class GrantRecord(TypedDict): funder_id: Annotated[str, "Unique Candid ID value for the funder of the grant"] funder_profile_link: Annotated[str, "Link to the Candid profile for the funder of the grant"] funder_name: Annotated[str, "Name of the funder of the grant"] recipient_id: Annotated[str, "Unique Candid ID value for the recipient of the grant"] recipient_profile_link: Annotated[str, "Link to the Candid profile for the recipient of the grant"] recipient_name: Annotated[str, "Name of the recipient of the grant"] fiscal_year: Annotated[int | float, "Fiscal year that the grant was awarded"] amount_usd: Annotated[int | float, "Dollar amount of the grant awarded in USD"] description: Annotated[str, "Description of the purpose of the grant"] working_on: Annotated[str, "Description of the subject purpose of the grant"] serving: Annotated[str, "Description of the population groups served by the grant"] @tool def grants_search( query: str, subject_codes: str | None = None, populations_served_codes: str | None = None, geonameids_of_geographies_served: str | None = None # ) -> list[dict[str, str | int | float | None]] | str: ) -> list[GrantRecord] | str: """Search for historical grants to find context about what is happening in the sector, and what organizations are involved with. This is intended for historial research purposes and contextualization. If trying to recommend funders then please use the dedicated funder recommendation tool instead of this. Funder recommendations uses grants and additional contexts, as well as a carefully trained graph neural network to provide targeted recommendations. Another important note is that this tool only returns up to 25 top relevant grant results and should never be used to make broad generalizations. Queries are natural text, and the retrieval mechanism is a hybrid approach of keywords and sparse vector searches over fields which describe the activity and purpose of the grant. While extra subject codes, populations served codes, and geography IDs for where the grant is serving is not required, grants may become more specific the more information can be provided. Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of the program they are seeking funding for. Geographies can be determined using the geo detection tool if the requester can supply a description of the program they are seeking funding for. Parameters ---------- query : str Text describing a user's question or a description of investigative work which requires support from Candid's grants knowledge base subject_codes : str | None, optional Subject codes from Candid's PCS taxonomy, comma separated, by default None populations_served_codes : str | None, optional Population groups served codes from Candid's PCS taxonomy, comma separated, by default None geonameids_of_geographies_served : str | None, optional Geonames ID values for geographies served by the requester's program, comma separted, by default None Examples -------- >>> grants_search(query='homeless shelters in new york') >>> grants_search( query='homeless shelters in new york', subject_codes='SS050000, SS000000,SB050000', populations_served_codes='PJ050100', geonameids_of_geographies_served='4094212,4094212' ) Returns ------- list[GrantRecord] | str Array of relevant grants and information about the organizations involved If output is a string then that means there was some error, and retry should be considered """ payload = {"query": query, "rowCount": 25} if subject_codes is not None: payload["SubjectArea"] = subject_codes.split(',') if populations_served_codes is not None: payload["PopulationServed"] = populations_served_codes.split(',') if geonameids_of_geographies_served: payload["GeographicArea"] = geonameids_of_geographies_served.split(',') with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client: r = client.get( url=SEARCH.endpoint("v1/grants/discovery"), params=payload, headers={**SEARCH.header} # type: ignore ) if r.status_code != 200: logger.error("Error calling grants search API %s. Error: %s", str(r.request.url), r.reason_phrase) return f"Error calling grants search. Error: {r.reason_phrase}" data: dict = r.json() output = [] for grant in data.get("grants") or []: working_on, serving = [], [] for facet, data in grant["pcsV3"].items(): if facet == "subject": working_on.extend([code["name"].lower() for code in data["value"]]) elif facet == "population": serving.extend([code["name"].lower() for code in data["value"]]) # output.append({ # "funder_id": grant["grantmakerId"], # "funder_profile_link": format_candid_profile_link(grant["grantmakerId"]), # "funder_name": grant["grantmakerName"], # "recipient_id": grant["recipientId"], # "recipient_profile_link": format_candid_profile_link(grant["recipientId"]), # "recipient_name": grant["recipientName"], # "fiscal_year": grant["fiscalYear"], # "amound_usd": grant["amountUsd"], # "description": grant["text"], # "working_on": f"Working on {', '.join(working_on)}", # "serving": f"Serving population groups {', '.join(serving)}", # }) output.append(GrantRecord( funder_id=grant["grantmakerId"], funder_profile_link=format_candid_profile_link(grant["grantmakerId"]), funder_name=grant["grantmakerName"], recipient_id=grant["recipientId"], recipient_profile_link=format_candid_profile_link(grant["recipientId"]), recipient_name=grant["recipientName"], fiscal_year=grant["fiscalYear"], amount_usd=grant["amountUsd"], description=grant["text"], working_on=f"Working on {', '.join(working_on)}", serving=f"Serving population groups {', '.join(serving)}" )) if not output: return "No grants were found, try a different search strategy." return output