brainsqueeze commited on
Commit
f5c9c80
·
verified ·
1 Parent(s): d916808
ask_candid/base/config/constants.py CHANGED
@@ -2,3 +2,16 @@ START_SYSTEM_PROMPT = (
2
  "You are a Candid subject matter expert on the social sector and philanthropy. "
3
  "You should address the user's queries and stay on topic."
4
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "You are a Candid subject matter expert on the social sector and philanthropy. "
3
  "You should address the user's queries and stay on topic."
4
  )
5
+
6
+ TONE_PROMPT = (
7
+ "You must be cordial with the user. You should be helpful, but NEVER be sychophantic. "
8
+ "NEVER use extreme rhetoric such as 'perfect', 'excellent' or 'amazing'. "
9
+ "Be realistic, NEVER convey confidence when responding in a context with a high degree of uncertainty. "
10
+ )
11
+
12
+ FEEDBACK_PROMPT = (
13
+ "NEVER assume that your responses are sufficiently answering the user's inquiry. "
14
+ "Ask for clarification from the user if the intent is not clear. "
15
+ "Ask the user if responses are helpful/userful, "
16
+ "and if any further context is needed to hone in on a better response."
17
+ )
ask_candid/base/config/models.py CHANGED
@@ -1,9 +1,21 @@
1
  from types import MappingProxyType
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  Name2Endpoint = MappingProxyType({
4
  "gpt-4o": "gpt-4o",
5
  "claude-3.5-haiku": "us.anthropic.claude-3-5-haiku-20241022-v1:0",
6
  "claude-4-sonnet": "us.anthropic.claude-sonnet-4-20250514-v1:0",
 
7
  # "llama-3.1-70b-instruct": "us.meta.llama3-1-70b-instruct-v1:0",
8
  # "mistral-large": "mistral.mistral-large-2402-v1:0",
9
  # "mixtral-8x7B": "mistral.mixtral-8x7b-instruct-v0:1",
 
1
  from types import MappingProxyType
2
+ from enum import Enum
3
+
4
+
5
+ class BedrockEndpoints(Enum):
6
+ claude_35_haiku = "us.anthropic.claude-3-5-haiku-20241022-v1:0"
7
+ claude_4_sonnet = "us.anthropic.claude-sonnet-4-20250514-v1:0"
8
+ claude_45_sonnet = "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
9
+ llama_31_70b_instruct = "us.meta.llama3-1-70b-instruct-v1:0"
10
+ mistral_large = "mistral.mistral-large-2402-v1:0"
11
+ mixtral_8x7b = "mistral.mixtral-8x7b-instruct-v0:1"
12
+
13
 
14
  Name2Endpoint = MappingProxyType({
15
  "gpt-4o": "gpt-4o",
16
  "claude-3.5-haiku": "us.anthropic.claude-3-5-haiku-20241022-v1:0",
17
  "claude-4-sonnet": "us.anthropic.claude-sonnet-4-20250514-v1:0",
18
+ "claude-4.5-sonnet": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
19
  # "llama-3.1-70b-instruct": "us.meta.llama3-1-70b-instruct-v1:0",
20
  # "mistral-large": "mistral.mistral-large-2402-v1:0",
21
  # "mixtral-8x7B": "mistral.mixtral-8x7b-instruct-v0:1",
ask_candid/base/retrieval/sources.py CHANGED
@@ -2,8 +2,11 @@ from ask_candid.base.retrieval.schemas import ElasticSourceConfig
2
 
3
 
4
  CandidBlogConfig = ElasticSourceConfig(
5
- index_name="search-semantic-candid-blog",
6
- semantic_fields=("content", "authors_text", "title_summary_tags")
 
 
 
7
  )
8
 
9
 
 
2
 
3
 
4
  CandidBlogConfig = ElasticSourceConfig(
5
+ index_name="search-semantic-blog",
6
+ semantic_fields=("semantic_title_summary_tags_text", "semantic_authors_text","semantic_content"),
7
+ text_fields=("title", "summary", "content", "authors_text"),
8
+ highlight_fields=("semantic_content",),
9
+ excluded_fields=("content",)
10
  )
11
 
12
 
ask_candid/chat.py CHANGED
@@ -1,5 +1,5 @@
1
  from typing import TypedDict, Literal, Any
2
- from collections.abc import Iterator
3
  from dataclasses import asdict
4
  import logging
5
  import json
@@ -34,11 +34,11 @@ class ToolResult(TypedDict):
34
  interrupts: list
35
 
36
 
37
- def convert_history_for_graph_agent(history: list[dict | ChatMessage]) -> list[dict]:
38
  _hist = []
39
  for h in history:
40
  if isinstance(h, ChatMessage):
41
- h = asdict(h)
42
 
43
  if h.get("content"):
44
  # if h.get("metadata"):
 
1
  from typing import TypedDict, Literal, Any
2
+ from collections.abc import Iterator, Sequence
3
  from dataclasses import asdict
4
  import logging
5
  import json
 
34
  interrupts: list
35
 
36
 
37
+ def convert_history_for_graph_agent(history: Sequence[dict | ChatMessage]) -> list[dict]:
38
  _hist = []
39
  for h in history:
40
  if isinstance(h, ChatMessage):
41
+ h = asdict(h) # noqa: PLW2901
42
 
43
  if h.get("content"):
44
  # if h.get("metadata"):
ask_candid/services/knowledge_base.py CHANGED
@@ -118,8 +118,13 @@ def generate_queries(
118
 
119
  for source_name in sources:
120
  if source_name == "Candid Blog":
121
- q = build_sparse_vector_query(query=query, fields=S.CandidBlogConfig.semantic_fields)
122
- q["_source"] = {"excludes": ["embeddings"]}
 
 
 
 
 
123
  q["size"] = 5
124
  vector_queries.extend([{"index": S.CandidBlogConfig.index_name}, q])
125
  elif source_name == "Candid Help":
@@ -289,7 +294,7 @@ def reranker(
289
  text = '\n'.join(highlight_texts)
290
  texts.append(text)
291
 
292
- if search_text and len(texts) == len(results) and len(texts) > 1:
293
  logger.info("Re-ranking %d retrieval results", len(results))
294
  scores = sparse_encoder.query_reranking(query=search_text, documents=texts)
295
  for r, s in zip(results, scores):
@@ -361,14 +366,13 @@ def process_hit(hit: ElasticHitsResult) -> Document:
361
  "url": f"https://www.youtube.com/watch?v={hit.source['video_id']}"
362
  }
363
  )
364
- elif "candid-blog" in hit.index:
 
365
  doc = Document(
366
  page_content='\n\n'.join([
367
- hit.source.get("title", ""),
368
- hit.source.get("excerpt", ""),
369
- get_context("content", hit, context_length=12, add_context=False),
370
- get_context("authors_text", hit, context_length=12, add_context=False),
371
- hit.source.get("title_summary_tags", "")
372
  ]),
373
  metadata={
374
  "title": hit.source.get("title", ""),
 
118
 
119
  for source_name in sources:
120
  if source_name == "Candid Blog":
121
+ q = build_sparse_vector_and_text_query(
122
+ query=query,
123
+ semantic_fields=S.CandidBlogConfig.semantic_fields,
124
+ text_fields=S.CandidBlogConfig.text_fields,
125
+ highlight_fields=S.CandidBlogConfig.highlight_fields,
126
+ excluded_fields=S.CandidBlogConfig.excluded_fields
127
+ )
128
  q["size"] = 5
129
  vector_queries.extend([{"index": S.CandidBlogConfig.index_name}, q])
130
  elif source_name == "Candid Help":
 
294
  text = '\n'.join(highlight_texts)
295
  texts.append(text)
296
 
297
+ if search_text and len(texts) == len(results) and len(texts) > max_num_results:
298
  logger.info("Re-ranking %d retrieval results", len(results))
299
  scores = sparse_encoder.query_reranking(query=search_text, documents=texts)
300
  for r, s in zip(results, scores):
 
366
  "url": f"https://www.youtube.com/watch?v={hit.source['video_id']}"
367
  }
368
  )
369
+ elif "blog" in hit.index:
370
+ highlight = hit.highlight or {}
371
  doc = Document(
372
  page_content='\n\n'.join([
373
+ hit.source.get("title_summary_tags_text", ""),
374
+ ' '.join(highlight.get("semantic_content", [])),
375
+ hit.source.get("authors_text", "")
 
 
376
  ]),
377
  metadata={
378
  "title": hit.source.get("title", ""),
ask_candid/tools/general.py CHANGED
@@ -5,8 +5,16 @@ from langchain_core.tools import tool
5
 
6
  @tool
7
  def get_current_day() -> date:
8
- """Get the current day to reference for any time-sensitive data requests. This might be useful for information
9
- searches through news data, where more current articles may be more relevant.
 
 
 
 
 
 
 
 
10
 
11
  Returns
12
  -------
 
5
 
6
  @tool
7
  def get_current_day() -> date:
8
+ """Get the current day to reference for any time-sensitive data requests.
9
+
10
+ ALWAYS call this tool:
11
+ * At the beginning of conversations involving dates or timelines
12
+ * Before searching news or time-sensitive data
13
+ * When interpreting or presenting any temporal information (recent, upcoming, last year, etc.)
14
+ * Before making statements about when events occurred or will occur
15
+
16
+ Never assume the correct date.
17
+ If data only includes partial date information, use this tool to infer missing details.
18
 
19
  Returns
20
  -------
ask_candid/tools/grants.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import logging
2
 
3
  from langchain_core.tools import tool
@@ -11,13 +12,28 @@ logger = logging.getLogger(__name__)
11
  logger.setLevel(logging.ERROR)
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  @tool
15
  def grants_search(
16
  query: str,
17
  subject_codes: str | None = None,
18
  populations_served_codes: str | None = None,
19
  geonameids_of_geographies_served: str | None = None
20
- ) -> list[dict[str, str | int | float | None]] | str:
 
21
  """Search for historical grants to find context about what is happening in the sector, and what organizations are
22
  involved with. This is intended for historial research purposes and contextualization. If trying to recommend
23
  funders then please use the dedicated funder recommendation tool instead of this. Funder recommendations uses grants
@@ -62,7 +78,7 @@ def grants_search(
62
 
63
  Returns
64
  -------
65
- list[dict[str, str | int | float | None]] | str
66
  Array of relevant grants and information about the organizations involved
67
  If output is a string then that means there was some error, and retry should be considered
68
  """
@@ -97,17 +113,34 @@ def grants_search(
97
  elif facet == "population":
98
  serving.extend([code["name"].lower() for code in data["value"]])
99
 
100
- output.append({
101
- "funder_id": grant["grantmakerId"],
102
- "funder_profile_link": format_candid_profile_link(grant["grantmakerId"]),
103
- "funder_name": grant["grantmakerName"],
104
- "recipient_id": grant["recipientId"],
105
- "recipient_profile_link": format_candid_profile_link(grant["recipientId"]),
106
- "recipient_name": grant["recipientName"],
107
- "fiscal_year": grant["fiscalYear"],
108
- "amound_usd": grant["amountUsd"],
109
- "description": grant["text"],
110
- "working_on": f"Working on {', '.join(working_on)}",
111
- "serving": f"Serving population groups {', '.join(serving)}",
112
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  return output
 
1
+ from typing import TypedDict, Annotated
2
  import logging
3
 
4
  from langchain_core.tools import tool
 
12
  logger.setLevel(logging.ERROR)
13
 
14
 
15
+ class GrantRecord(TypedDict):
16
+ funder_id: Annotated[str, "Unique Candid ID value for the funder of the grant"]
17
+ funder_profile_link: Annotated[str, "Link to the Candid profile for the funder of the grant"]
18
+ funder_name: Annotated[str, "Name of the funder of the grant"]
19
+ recipient_id: Annotated[str, "Unique Candid ID value for the recipient of the grant"]
20
+ recipient_profile_link: Annotated[str, "Link to the Candid profile for the recipient of the grant"]
21
+ recipient_name: Annotated[str, "Name of the recipient of the grant"]
22
+ fiscal_year: Annotated[int | float, "Fiscal year that the grant was awarded"]
23
+ amount_usd: Annotated[int | float, "Dollar amount of the grant awarded in USD"]
24
+ description: Annotated[str, "Description of the purpose of the grant"]
25
+ working_on: Annotated[str, "Description of the subject purpose of the grant"]
26
+ serving: Annotated[str, "Description of the population groups served by the grant"]
27
+
28
+
29
  @tool
30
  def grants_search(
31
  query: str,
32
  subject_codes: str | None = None,
33
  populations_served_codes: str | None = None,
34
  geonameids_of_geographies_served: str | None = None
35
+ # ) -> list[dict[str, str | int | float | None]] | str:
36
+ ) -> list[GrantRecord] | str:
37
  """Search for historical grants to find context about what is happening in the sector, and what organizations are
38
  involved with. This is intended for historial research purposes and contextualization. If trying to recommend
39
  funders then please use the dedicated funder recommendation tool instead of this. Funder recommendations uses grants
 
78
 
79
  Returns
80
  -------
81
+ list[GrantRecord] | str
82
  Array of relevant grants and information about the organizations involved
83
  If output is a string then that means there was some error, and retry should be considered
84
  """
 
113
  elif facet == "population":
114
  serving.extend([code["name"].lower() for code in data["value"]])
115
 
116
+ # output.append({
117
+ # "funder_id": grant["grantmakerId"],
118
+ # "funder_profile_link": format_candid_profile_link(grant["grantmakerId"]),
119
+ # "funder_name": grant["grantmakerName"],
120
+ # "recipient_id": grant["recipientId"],
121
+ # "recipient_profile_link": format_candid_profile_link(grant["recipientId"]),
122
+ # "recipient_name": grant["recipientName"],
123
+ # "fiscal_year": grant["fiscalYear"],
124
+ # "amound_usd": grant["amountUsd"],
125
+ # "description": grant["text"],
126
+ # "working_on": f"Working on {', '.join(working_on)}",
127
+ # "serving": f"Serving population groups {', '.join(serving)}",
128
+ # })
129
+
130
+ output.append(GrantRecord(
131
+ funder_id=grant["grantmakerId"],
132
+ funder_profile_link=format_candid_profile_link(grant["grantmakerId"]),
133
+ funder_name=grant["grantmakerName"],
134
+ recipient_id=grant["recipientId"],
135
+ recipient_profile_link=format_candid_profile_link(grant["recipientId"]),
136
+ recipient_name=grant["recipientName"],
137
+ fiscal_year=grant["fiscalYear"],
138
+ amount_usd=grant["amountUsd"],
139
+ description=grant["text"],
140
+ working_on=f"Working on {', '.join(working_on)}",
141
+ serving=f"Serving population groups {', '.join(serving)}"
142
+ ))
143
+
144
+ if not output:
145
+ return "No grants were found, try a different search strategy."
146
  return output
ask_candid/tools/org_search.py CHANGED
@@ -20,12 +20,13 @@ class OrganizationNames(BaseModel):
20
 
21
 
22
  class OrganizationIdentifierArgs(BaseModel):
 
23
  text: str = Field(..., description="Chat model response text which contains named organizations.")
24
 
25
 
26
  class OrganizationIdentifier(BaseTool):
27
  llm: BaseChatModel
28
- parser: type[PydanticOutputParser] = PydanticOutputParser(pydantic_object=OrganizationNames)
29
  template: str = """Extract only the names of officially recognized organizations, foundations, and government
30
  entities from the text below. Do not include any entries that contain descriptions, regional identifiers, or
31
  explanations within parentheses or following the name. Strictly exclude databases, resources, crowdfunding
@@ -35,11 +36,19 @@ class OrganizationIdentifier(BaseTool):
35
  output format: ```{format_instructions}```
36
  """
37
 
38
- name: str = "organization-identifier"
39
  description: str = """
40
  Identify the names of nonprofits and foundations from chat model responses. If it is likely that a response contains
41
  proper names then it should be processed through this tool.
42
 
 
 
 
 
 
 
 
 
43
  Examples
44
  --------
45
  >>> `organization_identifier('My Favorite Foundation awarded a grant to My Favorite Nonprofit.')`
@@ -55,12 +64,12 @@ class OrganizationIdentifier(BaseTool):
55
  )
56
  return RunnableSequence(prompt, self.llm, self.parser)
57
 
58
- def _run(self, text: str) -> str:
59
  chain = self._build_pipeline()
60
  result: OrganizationNames = chain.invoke({"chatbot_output": text})
61
  return result.orgnames
62
 
63
- async def _arun(self, text: str) -> str:
64
  chain = self._build_pipeline()
65
  result: OrganizationNames = await chain.ainvoke({"chatbot_output": text})
66
  return result.orgnames
 
20
 
21
 
22
  class OrganizationIdentifierArgs(BaseModel):
23
+ """Input arguments for the organization identifier tool"""
24
  text: str = Field(..., description="Chat model response text which contains named organizations.")
25
 
26
 
27
  class OrganizationIdentifier(BaseTool):
28
  llm: BaseChatModel
29
+ parser: PydanticOutputParser = PydanticOutputParser(pydantic_object=OrganizationNames)
30
  template: str = """Extract only the names of officially recognized organizations, foundations, and government
31
  entities from the text below. Do not include any entries that contain descriptions, regional identifiers, or
32
  explanations within parentheses or following the name. Strictly exclude databases, resources, crowdfunding
 
36
  output format: ```{format_instructions}```
37
  """
38
 
39
+ name: str = "organization_identifier"
40
  description: str = """
41
  Identify the names of nonprofits and foundations from chat model responses. If it is likely that a response contains
42
  proper names then it should be processed through this tool.
43
 
44
+ Some tools have outputs with organizations already identified by Candid's data. These include:
45
+ * grant search
46
+ * organization search
47
+ * funder recommendations
48
+ * RFP recommendations
49
+
50
+ If these tools are invoked then use the IDs and profile URLs provided in their outputs, and DO NOT use this tool.
51
+
52
  Examples
53
  --------
54
  >>> `organization_identifier('My Favorite Foundation awarded a grant to My Favorite Nonprofit.')`
 
64
  )
65
  return RunnableSequence(prompt, self.llm, self.parser)
66
 
67
+ def _run(self, text: str) -> list[str]:
68
  chain = self._build_pipeline()
69
  result: OrganizationNames = chain.invoke({"chatbot_output": text})
70
  return result.orgnames
71
 
72
+ async def _arun(self, text: str) -> list[str]:
73
  chain = self._build_pipeline()
74
  result: OrganizationNames = await chain.ainvoke({"chatbot_output": text})
75
  return result.orgnames
ask_candid/tools/recommendations.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Literal, Any
2
  import logging
3
 
4
  from langchain_core.tools import tool
@@ -13,6 +13,24 @@ logger = logging.getLogger(__name__)
13
  logger.setLevel(logging.ERROR)
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  @retry_on_status(num_retries=3)
17
  def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None) -> httpx.Response:
18
  with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
@@ -25,7 +43,7 @@ def organization_search(
25
  located_postal_code: str | None = None,
26
  located_admin1: str | None = None,
27
  search_mode: Literal["organization_only", "organization_and_grants"] | None = "organization_only"
28
- ) -> list[dict[str, str | None]] | str:
29
  """Search for organizations by name, description or work, program descriptions and locations. Here are some
30
  guidelines:
31
  * `query` controls hybrid searching involving both vector search and keyword search
@@ -54,7 +72,7 @@ def organization_search(
54
 
55
  Returns
56
  -------
57
- list[dict[str, str]] | str
58
  List of the top organization search results
59
  If output is a string then that means there was some error, and retry should be considered
60
  """
@@ -90,23 +108,40 @@ def organization_search(
90
  elif code.startswith('S'):
91
  working_on.append(description.lower())
92
 
93
- output.append({
94
- "nonprofit_id": org["candidEntityID"],
95
- "name": org["orgName"],
96
- "aka_name": org["akaName"],
97
- "acronym": org["acronymName"],
98
- "city": org["city"],
99
- "admin1": org["admin1"],
100
- "country": org["countryName"],
101
- "EIN": org["ein"],
102
- "profile_link": format_candid_profile_link(org['candidEntityID']),
103
- "working_on": f"Working on {', '.join(working_on)}",
104
- "serving": f"Serving population groups {', '.join(serving)}",
105
- "transparency_level": org["seal"].get("description"),
106
- "organization_roles": ', '.join(org["roles"]),
107
- "grants_awarded": ', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
108
- "grants_received": ', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
109
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  return output
111
 
112
 
@@ -201,7 +236,7 @@ def recommend_funders(
201
  data.get("meta") or {},
202
  [{
203
  **r,
204
- "candid_profile_url": format_candid_profile_link(r['funder_id'])
205
  } for r in (data.get("recommendations") or [])]
206
  )
207
 
@@ -282,6 +317,6 @@ def recommend_funding_opportunities(
282
  data.get("meta") or {},
283
  [{
284
  **r,
285
- "candid_profile_url": format_candid_profile_link(r['funder_id'])
286
  } for r in (data.get("recommendations") or [])]
287
  )
 
1
+ from typing import TypedDict, Literal, Annotated, Any
2
  import logging
3
 
4
  from langchain_core.tools import tool
 
13
  logger.setLevel(logging.ERROR)
14
 
15
 
16
+ class OrganizationRecord(TypedDict):
17
+ nonprofit_id: Annotated[str, "Unique Candid ID value for the organization"]
18
+ name: Annotated[str, "Name of the organization"]
19
+ aka_name: Annotated[str, "'Also-known-as' name of the organization"]
20
+ acronym: Annotated[str, "Acronym of the name of the organization"]
21
+ city: Annotated[str, "City that the organization is located in"]
22
+ admin1: Annotated[str, "State, province, or canton that the organization is located in"]
23
+ country: Annotated[str, "Country that the organization is located in"]
24
+ ein: Annotated[str, "IRS employer identification number (EIN) of the organization, only relevant for US-based orgs"]
25
+ profile_link: Annotated[str, "Link to the Candid profile for the organization"]
26
+ working_on: Annotated[str, "Description of the subject purpose of the organization"]
27
+ serving: Annotated[str, "Description of the population groups served by the organization"]
28
+ transparency_level: Annotated[str, "Candid Seal level of the organization indicating transparency level"]
29
+ organization_roles: Annotated[str, "Roles of the organization (eg. grantmaker, recipient)"]
30
+ grants_awarded: Annotated[str, "Summary stats of the grants awarded by the organization"]
31
+ grants_received: Annotated[str, "Summary stats of the grants received by the organization"]
32
+
33
+
34
  @retry_on_status(num_retries=3)
35
  def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None) -> httpx.Response:
36
  with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
 
43
  located_postal_code: str | None = None,
44
  located_admin1: str | None = None,
45
  search_mode: Literal["organization_only", "organization_and_grants"] | None = "organization_only"
46
+ ) -> list[OrganizationRecord] | str:
47
  """Search for organizations by name, description or work, program descriptions and locations. Here are some
48
  guidelines:
49
  * `query` controls hybrid searching involving both vector search and keyword search
 
72
 
73
  Returns
74
  -------
75
+ list[OrganizationRecord] | str
76
  List of the top organization search results
77
  If output is a string then that means there was some error, and retry should be considered
78
  """
 
108
  elif code.startswith('S'):
109
  working_on.append(description.lower())
110
 
111
+ # output.append({
112
+ # "nonprofit_id": org["candidEntityID"],
113
+ # "name": org["orgName"],
114
+ # "aka_name": org["akaName"],
115
+ # "acronym": org["acronymName"],
116
+ # "city": org["city"],
117
+ # "admin1": org["admin1"],
118
+ # "country": org["countryName"],
119
+ # "EIN": org["ein"],
120
+ # "profile_link": format_candid_profile_link(org['candidEntityID']),
121
+ # "working_on": f"Working on {', '.join(working_on)}",
122
+ # "serving": f"Serving population groups {', '.join(serving)}",
123
+ # "transparency_level": org["seal"].get("description"),
124
+ # "organization_roles": ', '.join(org["roles"]),
125
+ # "grants_awarded": ', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
126
+ # "grants_received": ', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
127
+ # })
128
+ output.append(OrganizationRecord(
129
+ nonprofit_id=org["candidEntityID"],
130
+ name=org["orgName"],
131
+ aka_name=org["akaName"],
132
+ acronym=org["acronymName"],
133
+ city=org["city"],
134
+ admin1=org["admin1"],
135
+ country=org["countryName"],
136
+ ein=org["ein"],
137
+ profile_link=format_candid_profile_link(org['candidEntityID']),
138
+ working_on=f"Working on {', '.join(working_on)}",
139
+ serving=f"Serving population groups {', '.join(serving)}",
140
+ transparency_level=org["seal"].get("description"),
141
+ organization_roles=', '.join(org["roles"]),
142
+ grants_awarded=', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
143
+ grants_received=', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
144
+ ))
145
  return output
146
 
147
 
 
236
  data.get("meta") or {},
237
  [{
238
  **r,
239
+ "profile_link": format_candid_profile_link(r['funder_id'])
240
  } for r in (data.get("recommendations") or [])]
241
  )
242
 
 
317
  data.get("meta") or {},
318
  [{
319
  **r,
320
+ "profile_link": format_candid_profile_link(r['funder_id'])
321
  } for r in (data.get("recommendations") or [])]
322
  )
ask_candid/utils.py CHANGED
@@ -1,5 +1,4 @@
1
  from typing import Any
2
- from uuid import uuid4
3
 
4
  from langchain_core.documents import Document
5
 
@@ -51,13 +50,3 @@ def format_chat_ag_response(chatbot: list[Any]) -> list[Any]:
51
  chatbot.pop(-1)
52
  chatbot[-1]["content"] = chatbot[-1]["content"] + sources
53
  return chatbot
54
-
55
-
56
- def valid_inputs(*args) -> bool:
57
- return any(a is not None or (isinstance(a, str) and a.strip() != '') for a in args)
58
-
59
-
60
- def get_session_id(thread_id: str | None) -> str:
61
- if not thread_id:
62
- thread_id = uuid4().hex
63
- return thread_id
 
1
  from typing import Any
 
2
 
3
  from langchain_core.documents import Document
4
 
 
50
  chatbot.pop(-1)
51
  chatbot[-1]["content"] = chatbot[-1]["content"] + sources
52
  return chatbot
 
 
 
 
 
 
 
 
 
 
chat_v2.py CHANGED
@@ -14,7 +14,7 @@ from ask_candid.tools.search import search_candid_knowledge_base
14
  from ask_candid.tools.general import get_current_day
15
  from ask_candid.utils import html_format_docs_chat
16
  from ask_candid.base.config.constants import START_SYSTEM_PROMPT
17
- from ask_candid.base.config.models import Name2Endpoint
18
  from ask_candid.chat import convert_history_for_graph_agent, format_tool_call, format_tool_response
19
 
20
  try:
@@ -40,7 +40,7 @@ class LoggedComponents(TypedDict):
40
  def build_execution_graph() -> CompiledStateGraph:
41
  llm = ChatBedrock(
42
  client=boto3.client("bedrock-runtime", region_name="us-east-1"),
43
- model=Name2Endpoint["claude-3.5-haiku"]
44
  )
45
  org_name_recognition = OrganizationIdentifier(llm=llm) # bind the main chat model to the tool
46
  return create_react_agent(
@@ -82,14 +82,14 @@ async def execute(
82
  if fname.endswith('.txt'):
83
  with open(fname, 'r', encoding='utf8') as f:
84
  history.append(gr.ChatMessage(role="user", content=f.read()))
85
- yield gr.MultimodalTextbox(value=None, interactive=True), history
86
 
87
  horizon = len(history)
88
- inputs = {"messages": convert_history_for_graph_agent(history)}
 
89
 
 
90
  graph = build_execution_graph()
91
 
92
- history.append(gr.ChatMessage(role="assistant", content=""))
93
  async for stream_mode, chunk in graph.astream(inputs, stream_mode=["messages", "tasks"]):
94
  if stream_mode == "messages" and chunk[0].content:
95
  for msg in chunk[0].content:
@@ -165,7 +165,7 @@ def build_chat_app():
165
  None, # user
166
  BOT_LOGO, # bot
167
  ),
168
- height="50vh",
169
  type="messages",
170
  show_label=False,
171
  show_copy_button=True,
 
14
  from ask_candid.tools.general import get_current_day
15
  from ask_candid.utils import html_format_docs_chat
16
  from ask_candid.base.config.constants import START_SYSTEM_PROMPT
17
+ from ask_candid.base.config.models import BedrockEndpoints
18
  from ask_candid.chat import convert_history_for_graph_agent, format_tool_call, format_tool_response
19
 
20
  try:
 
40
  def build_execution_graph() -> CompiledStateGraph:
41
  llm = ChatBedrock(
42
  client=boto3.client("bedrock-runtime", region_name="us-east-1"),
43
+ model=BedrockEndpoints.claude_35_haiku.value
44
  )
45
  org_name_recognition = OrganizationIdentifier(llm=llm) # bind the main chat model to the tool
46
  return create_react_agent(
 
82
  if fname.endswith('.txt'):
83
  with open(fname, 'r', encoding='utf8') as f:
84
  history.append(gr.ChatMessage(role="user", content=f.read()))
 
85
 
86
  horizon = len(history)
87
+ history.append(gr.ChatMessage(role="assistant", content=""))
88
+ yield gr.MultimodalTextbox(value=None, interactive=True), history
89
 
90
+ inputs = {"messages": convert_history_for_graph_agent(history)}
91
  graph = build_execution_graph()
92
 
 
93
  async for stream_mode, chunk in graph.astream(inputs, stream_mode=["messages", "tasks"]):
94
  if stream_mode == "messages" and chunk[0].content:
95
  for msg in chunk[0].content:
 
165
  None, # user
166
  BOT_LOGO, # bot
167
  ),
168
+ height="60vh",
169
  type="messages",
170
  show_label=False,
171
  show_copy_button=True,