Spaces:
Running
Running
| import os | |
| import re | |
| import requests | |
| import json | |
| from typing import Tuple, List, Optional | |
| from omegaconf import OmegaConf | |
| from pydantic import Field, BaseModel | |
| from vectara_agentic.agent import Agent | |
| from vectara_agentic.agent_config import AgentConfig | |
| from vectara_agentic.tools import ToolsFactory, VectaraToolFactory | |
| from vectara_agentic.tools_catalog import ToolsCatalog | |
| from vectara_agentic.types import ModelProvider, AgentType | |
| from dotenv import load_dotenv | |
| load_dotenv(override=True) | |
| citation_description = ''' | |
| The citation for a particular case. | |
| Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. | |
| ''' | |
| def extract_components_from_citation(citation: str) -> dict: | |
| citation_components = citation.split(' ') | |
| volume_num = citation_components[0] | |
| reporter = '-'.join(citation_components[1:-1]).replace('.', '').lower() | |
| first_page = citation_components[-1] | |
| if not volume_num.isdigit(): | |
| return {} | |
| if not first_page.isdigit(): | |
| return {} | |
| return {'volume': int(volume_num), 'reporter': reporter, 'first_page': int(first_page)} | |
| class AgentTools: | |
| def __init__(self, _cfg, agent_config): | |
| self.tools_factory = ToolsFactory() | |
| self.agent_config = agent_config | |
| self.cfg = _cfg | |
| self.vec_factory = VectaraToolFactory( | |
| vectara_api_key=_cfg.api_key, | |
| vectara_corpus_key=_cfg.corpus_key, | |
| ) | |
| def get_opinion_text( | |
| self, | |
| case_citation: str = Field(description = citation_description), | |
| summarize: bool = Field(default=True, description="if True returns case summary, otherwise the full text of the case") | |
| ) -> str: | |
| """ | |
| Returns the full opinion/ruling text of the case, or the summary if summarize=True. | |
| If there is more than one opinion for the case, the type of each opinion is returned with the text, | |
| and the opinions (or their summaries) are separated by semicolons (;) | |
| Args: | |
| case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. | |
| summarize (bool): True to return just a summary of the case, False to return full case text. | |
| returns: | |
| str: the full opinion/ruling text of the case, or the summary if summarize is True. | |
| """ | |
| citation_dict = extract_components_from_citation(case_citation) | |
| if not citation_dict: | |
| return f"Citation is invalid: {case_citation}." | |
| summarize_text = ToolsCatalog(self.agent_config).summarize_text | |
| reporter = citation_dict['reporter'] | |
| volume_num = citation_dict['volume'] | |
| first_page = citation_dict['first_page'] | |
| response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") | |
| if response.status_code != 200: | |
| return f"Case not found; please check the citation {case_citation}." | |
| res = json.loads(response.text) | |
| if len(res["casebody"]["opinions"]) == 1: | |
| text = res["casebody"]["opinions"][0]["text"] | |
| output = text if not summarize else summarize_text(text, "law") | |
| else: | |
| output = "" | |
| for opinion in res["casebody"]["opinions"]: | |
| text = opinion["text"] if not summarize else summarize_text(opinion["text"], "law") | |
| output += f"Opinion type: {opinion['type']}, text: {text};" | |
| return output | |
| def get_case_document_pdf( | |
| self, | |
| case_citation: str = Field(description = citation_description) | |
| ) -> str: | |
| """ | |
| Given a case citation, returns a valid web URL to a pdf of the case record | |
| Args: | |
| case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. | |
| Returns: | |
| str: a valid web URL to a pdf of the case record | |
| """ | |
| citation_dict = extract_components_from_citation(case_citation) | |
| if not citation_dict: | |
| return f"Citation is invalid: {case_citation}." | |
| reporter = citation_dict['reporter'] | |
| volume_num = citation_dict['volume'] | |
| first_page = citation_dict['first_page'] | |
| response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") | |
| if response.status_code != 200: | |
| return f"Case not found; please check the citation {case_citation}." | |
| res = json.loads(response.text) | |
| page_number = res["first_page_order"] | |
| return f"https://static.case.law/{reporter}/{volume_num}.pdf#page={page_number}" | |
| def get_case_document_page( | |
| self, | |
| case_citation: str = Field(description = citation_description) | |
| ) -> str: | |
| """ | |
| Given a case citation, returns a valid web URL to a page with information about the case. | |
| Args: | |
| case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. | |
| Returns: | |
| str: a valid web URL to a page with information about the case | |
| """ | |
| citation_dict = extract_components_from_citation(case_citation) | |
| if not citation_dict: | |
| return f"Citation is invalid: {case_citation}." | |
| reporter = citation_dict['reporter'] | |
| volume_num = citation_dict['volume'] | |
| first_page = citation_dict['first_page'] | |
| url = f"https://case.law/caselaw/?reporter={reporter}&volume={volume_num}&case={first_page:04d}-01" | |
| response = requests.get(url) | |
| if response.status_code != 200: | |
| return "Case not found; please check the citation." | |
| return url | |
| def get_case_name( | |
| self, | |
| case_citation: str = Field(description = citation_description) | |
| ) -> Tuple[str, str]: | |
| """ | |
| Given a case citation, returns its name and name abbreviation. | |
| Args: | |
| case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. | |
| Returns: | |
| Tuple[str, str]: the name and name abbreviation of the case | |
| """ | |
| citation_dict = extract_components_from_citation(case_citation) | |
| if not citation_dict: | |
| return f"Citation is invalid: {case_citation}.", f"Citation is invalid: {case_citation}." | |
| reporter = citation_dict['reporter'] | |
| volume_num = citation_dict['volume'] | |
| first_page = citation_dict['first_page'] | |
| response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") | |
| if response.status_code != 200: | |
| return "Case not found", "Case not found" | |
| res = json.loads(response.text) | |
| return res["name"], res["name_abbreviation"] | |
| def get_cited_cases( | |
| self, | |
| case_citation: str = Field(description = citation_description) | |
| ) -> List[dict]: | |
| """ | |
| Given a case citation, returns a list of cases that are cited by the opinion of this case. | |
| Args: | |
| case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. | |
| Returns: | |
| A list of cases, each a dict with the citation, name and name_abbreviation of the case. | |
| """ | |
| citation_dict = extract_components_from_citation(case_citation) | |
| if not citation_dict: | |
| return [f"Citation is invalid: {case_citation}."] | |
| reporter = citation_dict['reporter'] | |
| volume_num = citation_dict['volume'] | |
| first_page = citation_dict['first_page'] | |
| response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") | |
| if response.status_code != 200: | |
| return "Case not found; please check the citation." | |
| res = json.loads(response.text) | |
| citations = res["cites_to"] | |
| res = [] | |
| for citation in citations[:10]: | |
| name, name_abbreviation = self.get_case_name(citation["cite"]) | |
| res.append({ | |
| "citation": citation["cite"], | |
| "name": name, | |
| "name_abbreviation": name_abbreviation | |
| }) | |
| return res | |
| def validate_url( | |
| self, | |
| url: str = Field(description = "A web url pointing to case-law document") | |
| ) -> str: | |
| """ | |
| Given a url, returns whether or not the url is valid. | |
| Args: | |
| url (str): A web url pointing to case-law document | |
| Returns: | |
| str: "URL is valid" if the url is valid, "URL is invalid" otherwise. | |
| """ | |
| pdf_pattern = re.compile(r'^https://static.case.law/.*') | |
| document_pattern = re.compile(r'^https://case.law/caselaw/?reporter=.*') | |
| return "URL is valid" if pdf_pattern.match(url) or document_pattern.match(url) else "URL is invalid" | |
| def get_tools(self): | |
| class QueryCaselawArgs(BaseModel): | |
| citations: Optional[str] = Field(description = citation_description, default=None) | |
| summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o' | |
| ask_caselaw = self.vec_factory.create_rag_tool( | |
| tool_name = "ask_caselaw", | |
| tool_description = "A tool for asking questions about case law, and any legal issue in the state of Alaska.", | |
| tool_args_schema = QueryCaselawArgs, | |
| reranker = "chain", rerank_k = 100, | |
| rerank_chain = [ | |
| { | |
| "type": "slingshot", | |
| "cutoff": 0.2 | |
| }, | |
| { | |
| "type": "mmr", | |
| "diversity_bias": 0.1 | |
| }, | |
| { | |
| "type": "userfn", | |
| "user_function": "max(1000 * get('$.score') - hours(seconds(to_unix_timestamp(now()) - to_unix_timestamp(datetime_parse(get('$.document_metadata.decision_date'), 'yyyy-MM-dd')))) / 24 / 365, 0)" | |
| } | |
| ], | |
| n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005, | |
| summary_num_results = 15, | |
| vectara_summarizer = summarizer, | |
| max_tokens = 4096, | |
| max_response_chars = 8192, | |
| include_citations = True, | |
| save_history = True, | |
| ) | |
| search_caselaw = self.vec_factory.create_search_tool( | |
| tool_name = "search_caselaw", | |
| tool_description = "A tool for retrieving a list of relevant documents about case law in Alaska.", | |
| tool_args_schema = QueryCaselawArgs, | |
| reranker = "chain", rerank_k = 100, | |
| rerank_chain = [ | |
| { | |
| "type": "slingshot", | |
| "cutoff": 0.2 | |
| }, | |
| { | |
| "type": "mmr", | |
| "diversity_bias": 0.1 | |
| }, | |
| ], | |
| n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005, | |
| ) | |
| return ( | |
| [ask_caselaw, search_caselaw] + | |
| [self.tools_factory.create_tool(tool) for tool in [ | |
| self.get_opinion_text, | |
| self.get_case_document_pdf, | |
| self.get_case_document_page, | |
| self.get_cited_cases, | |
| self.get_case_name, | |
| self.validate_url | |
| ]] | |
| ) | |
| def get_agent_config() -> OmegaConf: | |
| cfg = OmegaConf.create({ | |
| 'corpus_key': str(os.environ['VECTARA_CORPUS_KEY']), | |
| 'api_key': str(os.environ['VECTARA_API_KEY']), | |
| 'examples': os.environ.get('QUERY_EXAMPLES', None), | |
| 'demo_name': "legal-agent", | |
| 'demo_welcome': "Welcome to the Legal Assistant demo.", | |
| 'demo_description': "This demo can help you prepare for a court case by providing you information about past court cases in Alaska.", | |
| }) | |
| return cfg | |
| def initialize_agent(_cfg, agent_progress_callback=None): | |
| legal_assistant_instructions = """ | |
| - You are a helpful legal assistant, with expertise in case law in the state of Alaska. | |
| - Always use the 'ask_caselaw' tool first, as your primary tool for answering questions. Never use your own knowledge. | |
| - The references returned by the 'ask_caselaw' tool include metadata relevant to its response, such as case citations, dates, or names. | |
| - Use the 'search_caselaw' tool to search for documents related to case law in Alaska, and set summarize=True to get a summary of those documents. | |
| - when including a case citation in your response you can call 'get_case_document_pdf' or 'get_case_document_page' with the case citation | |
| to obtain a valid web URL to a page with information about the case. | |
| - Never use your internal knowledge to guess a URL or link. Only use URLs provided by tools and validated by the 'validate_url' tool. | |
| - Never use your internal knowledge to guess a case citation. Only use citation information provided by a tool or by the user. | |
| - A Case Citation includes 3 components: volume number, reporter, and first page. | |
| Here are some examples: '253 P.2d 136', '10 Alaska 11', '6 C.M.A. 3' | |
| - If two cases have conflicting rulings, assume that the case with the more current ruling date is correct. | |
| - If the response is based on cases that are older than 5 years, make sure to inform the user that the information may be outdated, | |
| since some case opinions may no longer apply in law. | |
| - If a user wants to test their argument, use the 'ask_caselaw' tool to gather information about cases related to their argument | |
| and the 'critique_as_judge' tool to determine whether their argument is sound or has issues that must be corrected. | |
| - Never discuss politics, and always respond politely. | |
| """ | |
| agent_config = AgentConfig( | |
| agent_type = os.getenv("VECTARA_AGENTIC_AGENT_TYPE", AgentType.OPENAI.value), | |
| main_llm_provider = os.getenv("VECTARA_AGENTIC_MAIN_LLM_PROVIDER", ModelProvider.OPENAI.value), | |
| main_llm_model_name = os.getenv("VECTARA_AGENTIC_MAIN_MODEL_NAME", ""), | |
| tool_llm_provider = os.getenv("VECTARA_AGENTIC_TOOL_LLM_PROVIDER", ModelProvider.OPENAI.value), | |
| tool_llm_model_name = os.getenv("VECTARA_AGENTIC_TOOL_MODEL_NAME", ""), | |
| observer = os.getenv("VECTARA_AGENTIC_OBSERVER_TYPE", "NO_OBSERVER") | |
| ) | |
| fallback_agent_config = AgentConfig( | |
| agent_type = os.getenv("VECTARA_AGENTIC_FALLBACK_AGENT_TYPE", AgentType.OPENAI.value), | |
| main_llm_provider = os.getenv("VECTARA_AGENTIC_FALLBACK_MAIN_LLM_PROVIDER", ModelProvider.OPENAI.value), | |
| main_llm_model_name = os.getenv("VECTARA_AGENTIC_FALLBACK_MAIN_MODEL_NAME", ""), | |
| tool_llm_provider = os.getenv("VECTARA_AGENTIC_FALLBACK_TOOL_LLM_PROVIDER", ModelProvider.OPENAI.value), | |
| tool_llm_model_name = os.getenv("VECTARA_AGENTIC_FALLBACK_TOOL_MODEL_NAME", ""), | |
| observer = os.getenv("VECTARA_AGENTIC_OBSERVER_TYPE", "NO_OBSERVER") | |
| ) | |
| agent = Agent( | |
| tools=AgentTools(_cfg, agent_config).get_tools(), | |
| topic="Case law in Alaska", | |
| custom_instructions=legal_assistant_instructions, | |
| agent_progress_callback=agent_progress_callback, | |
| agent_config=agent_config, | |
| fallback_agent_config=fallback_agent_config, | |
| ) | |
| agent.report(detailed=True) | |
| return agent | |