from typing import Any import logging from langchain_core.tools import tool import httpx from ask_candid.base.utils import retry_on_status from ask_candid.base.config.rest import AUTOCODING, DOCUMENT logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s") logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) @retry_on_status(num_retries=3) def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None): with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client: return client.get(url=url, params=payload, headers=headers) @tool def autocode(text: str) -> dict[str, list] | str: """Uses natural language processing to align some input text to Candid's taxonomy: https://taxonomy.candid.org. The taxonomy describes activity in the social and philanthropic sectors. Parameters ---------- text : str Text describing working in the social sector. This should be related to the social and/or philanthropic sector. Returns ------- dict[str, list] | str Taxonomy responses. The keys of the dictionary are individual taxonomy facets, and the items in the dictionary are each term which the NLP model has determined is relevant giving the input text. This also includes confidence score. """ r = httpx.get( url=AUTOCODING.endpoint("predict"), params={"text": text}, headers={**AUTOCODING.header} # type: ignore ) if r.status_code != 200: logger.error("Error calling autocoding API %s. Error: %s", str(r.request.url), r.reason_phrase) return f"Error calling autocoding. Error: {r.reason_phrase}" data: dict = r.json().get("data", {}) return {k: v for k, v in data.items() if k in {"subject", "population"}} @tool def geo_detect(text: str) -> list[dict[str, Any]] | str: """Uses natural language processing to find and match named geographies found in the supplied text. The output will supply identified geographies from [Geonames](https://www.geonames.org/). Parameters ---------- text : str Text describing working in the social sector. This should be related to the social and/or philanthropic sector. Returns ------- list[dict[str, Any]] | str Matched geographies responses. This is an array of JSON objects which contain the `name` of the geography as it appeared in the supplied text, and the best match to a Geonames geography. For many Candid knowledge tools the `geonames_id` value will be most useful. If output is a string then that means there was some error, and retry should be considered """ r = get_with_retries( url=DOCUMENT.endpoint("entities/geographies"), payload={"text": text, "only_best_match": True}, headers={**DOCUMENT.header} ) assert isinstance(r, httpx.Response) if r.status_code != 200: logger.error("Error calling geo detection API %s. Error: %s", str(r.request.url), r.reason_phrase) return f"Error calling geo detection. Error: {r.reason_phrase}" data: dict = r.json().get("entities", []) return [{"name": entity["name"], "match": entity["match"][:1]} for entity in data if entity.get("type") == "geo"]