brainsqueeze's picture
Update issuelab query technique
68e9b80 verified
from typing import Any
import logging
from langchain_core.tools import tool
import httpx
from ask_candid.base.utils import retry_on_status
from ask_candid.base.config.rest import AUTOCODING, DOCUMENT
logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)
@retry_on_status(num_retries=3)
def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None):
with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
return client.get(url=url, params=payload, headers=headers)
@tool
def autocode(text: str) -> dict[str, list] | str:
"""Uses natural language processing to align some input text to Candid's taxonomy: https://taxonomy.candid.org.
The taxonomy describes activity in the social and philanthropic sectors.
Parameters
----------
text : str
Text describing working in the social sector. This should be related to the social and/or philanthropic sector.
Returns
-------
dict[str, list] | str
Taxonomy responses. The keys of the dictionary are individual taxonomy facets, and the items in the dictionary
are each term which the NLP model has determined is relevant giving the input text. This also includes
confidence score.
"""
r = httpx.get(
url=AUTOCODING.endpoint("predict"),
params={"text": text},
headers={**AUTOCODING.header} # type: ignore
)
if r.status_code != 200:
logger.error("Error calling autocoding API %s. Error: %s", str(r.request.url), r.reason_phrase)
return f"Error calling autocoding. Error: {r.reason_phrase}"
data: dict = r.json().get("data", {})
return {k: v for k, v in data.items() if k in {"subject", "population"}}
@tool
def geo_detect(text: str) -> list[dict[str, Any]] | str:
"""Uses natural language processing to find and match named geographies found in the supplied text. The output
will supply identified geographies from [Geonames](https://www.geonames.org/).
Parameters
----------
text : str
Text describing working in the social sector. This should be related to the social and/or philanthropic sector.
Returns
-------
list[dict[str, Any]] | str
Matched geographies responses. This is an array of JSON objects which contain the `name` of the geography as it
appeared in the supplied text, and the best match to a Geonames geography. For many Candid knowledge tools the
`geonames_id` value will be most useful.
If output is a string then that means there was some error, and retry should be considered
"""
r = get_with_retries(
url=DOCUMENT.endpoint("entities/geographies"),
payload={"text": text, "only_best_match": True},
headers={**DOCUMENT.header}
)
assert isinstance(r, httpx.Response)
if r.status_code != 200:
logger.error("Error calling geo detection API %s. Error: %s", str(r.request.url), r.reason_phrase)
return f"Error calling geo detection. Error: {r.reason_phrase}"
data: dict = r.json().get("entities", [])
return [{"name": entity["name"], "match": entity["match"][:1]} for entity in data if entity.get("type") == "geo"]