Spaces:
Running
Running
| from typing import Any | |
| import logging | |
| from langchain_core.tools import tool | |
| import httpx | |
| from ask_candid.base.utils import retry_on_status | |
| from ask_candid.base.config.rest import AUTOCODING, DOCUMENT | |
| logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s") | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.ERROR) | |
| def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None): | |
| with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client: | |
| return client.get(url=url, params=payload, headers=headers) | |
| def autocode(text: str) -> dict[str, list] | str: | |
| """Uses natural language processing to align some input text to Candid's taxonomy: https://taxonomy.candid.org. | |
| The taxonomy describes activity in the social and philanthropic sectors. | |
| Parameters | |
| ---------- | |
| text : str | |
| Text describing working in the social sector. This should be related to the social and/or philanthropic sector. | |
| Returns | |
| ------- | |
| dict[str, list] | str | |
| Taxonomy responses. The keys of the dictionary are individual taxonomy facets, and the items in the dictionary | |
| are each term which the NLP model has determined is relevant giving the input text. This also includes | |
| confidence score. | |
| """ | |
| r = httpx.get( | |
| url=AUTOCODING.endpoint("predict"), | |
| params={"text": text}, | |
| headers={**AUTOCODING.header} # type: ignore | |
| ) | |
| if r.status_code != 200: | |
| logger.error("Error calling autocoding API %s. Error: %s", str(r.request.url), r.reason_phrase) | |
| return f"Error calling autocoding. Error: {r.reason_phrase}" | |
| data: dict = r.json().get("data", {}) | |
| return {k: v for k, v in data.items() if k in {"subject", "population"}} | |
| def geo_detect(text: str) -> list[dict[str, Any]] | str: | |
| """Uses natural language processing to find and match named geographies found in the supplied text. The output | |
| will supply identified geographies from [Geonames](https://www.geonames.org/). | |
| Parameters | |
| ---------- | |
| text : str | |
| Text describing working in the social sector. This should be related to the social and/or philanthropic sector. | |
| Returns | |
| ------- | |
| list[dict[str, Any]] | str | |
| Matched geographies responses. This is an array of JSON objects which contain the `name` of the geography as it | |
| appeared in the supplied text, and the best match to a Geonames geography. For many Candid knowledge tools the | |
| `geonames_id` value will be most useful. | |
| If output is a string then that means there was some error, and retry should be considered | |
| """ | |
| r = get_with_retries( | |
| url=DOCUMENT.endpoint("entities/geographies"), | |
| payload={"text": text, "only_best_match": True}, | |
| headers={**DOCUMENT.header} | |
| ) | |
| assert isinstance(r, httpx.Response) | |
| if r.status_code != 200: | |
| logger.error("Error calling geo detection API %s. Error: %s", str(r.request.url), r.reason_phrase) | |
| return f"Error calling geo detection. Error: {r.reason_phrase}" | |
| data: dict = r.json().get("entities", []) | |
| return [{"name": entity["name"], "match": entity["match"][:1]} for entity in data if entity.get("type") == "geo"] | |