Spaces:

CandidAI
/

ask-candid

Running

App Files Files Community

ask-candid / ask_candid /tools /nlp.py

brainsqueeze

Update issuelab query technique

68e9b80 verified about 2 months ago

raw

history blame contribute delete

3.32 kB

	from typing import Any
	import logging

	from langchain_core.tools import tool
	import httpx

	from ask_candid.base.utils import retry_on_status
	from ask_candid.base.config.rest import AUTOCODING, DOCUMENT

	logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.ERROR)


	@retry_on_status(num_retries=3)
	def get_with_retries(url: str, payload: dict[str, Any] \| None, headers: dict[str, str] \| None):
	with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
	return client.get(url=url, params=payload, headers=headers)


	@tool
	def autocode(text: str) -> dict[str, list] \| str:
	"""Uses natural language processing to align some input text to Candid's taxonomy: https://taxonomy.candid.org.
	The taxonomy describes activity in the social and philanthropic sectors.

	Parameters
	----------
	text : str
	Text describing working in the social sector. This should be related to the social and/or philanthropic sector.

	Returns
	-------
	dict[str, list] \| str
	Taxonomy responses. The keys of the dictionary are individual taxonomy facets, and the items in the dictionary
	are each term which the NLP model has determined is relevant giving the input text. This also includes
	confidence score.
	"""

	r = httpx.get(
	url=AUTOCODING.endpoint("predict"),
	params={"text": text},
	headers={**AUTOCODING.header} # type: ignore
	)

	if r.status_code != 200:
	logger.error("Error calling autocoding API %s. Error: %s", str(r.request.url), r.reason_phrase)
	return f"Error calling autocoding. Error: {r.reason_phrase}"

	data: dict = r.json().get("data", {})
	return {k: v for k, v in data.items() if k in {"subject", "population"}}


	@tool
	def geo_detect(text: str) -> list[dict[str, Any]] \| str:
	"""Uses natural language processing to find and match named geographies found in the supplied text. The output
	will supply identified geographies from [Geonames](https://www.geonames.org/).

	Parameters
	----------
	text : str
	Text describing working in the social sector. This should be related to the social and/or philanthropic sector.

	Returns
	-------
	list[dict[str, Any]] \| str
	Matched geographies responses. This is an array of JSON objects which contain the `name` of the geography as it
	appeared in the supplied text, and the best match to a Geonames geography. For many Candid knowledge tools the
	`geonames_id` value will be most useful.
	If output is a string then that means there was some error, and retry should be considered
	"""

	r = get_with_retries(
	url=DOCUMENT.endpoint("entities/geographies"),
	payload={"text": text, "only_best_match": True},
	headers={**DOCUMENT.header}
	)
	assert isinstance(r, httpx.Response)
	if r.status_code != 200:
	logger.error("Error calling geo detection API %s. Error: %s", str(r.request.url), r.reason_phrase)
	return f"Error calling geo detection. Error: {r.reason_phrase}"

	data: dict = r.json().get("entities", [])
	return [{"name": entity["name"], "match": entity["match"][:1]} for entity in data if entity.get("type") == "geo"]