Spaces:
Sleeping
Sleeping
| from typing import Any, Literal | |
| import httpx | |
| import traceback | |
| from mcp.server.fastmcp import FastMCP | |
| # Initialize FastMCP server | |
| mcp = FastMCP("arxiv-omar") | |
| # Constants | |
| CUSTOM_ARXIV_API_BASE = "https://om4r932-arxiv.hf.space" | |
| DDG_API_BASE = "https://ychkhan-ptt-endpoints.hf.space" | |
| API_3GPP_BASE = "https://organizedprogrammers-3gppdocfinder.hf.space" | |
| # Helpers | |
| async def make_request(url: str, data: dict = None) -> dict[str, Any] | None: | |
| if data is None: | |
| return None | |
| headers = { | |
| "Accept": "application/json" | |
| } | |
| async with httpx.AsyncClient(verify=False) as client: | |
| try: | |
| response = await client.post(url, headers=headers, json=data) | |
| print(response) | |
| response.raise_for_status() | |
| return response.json() | |
| except Exception as e: | |
| traceback.print_exception(e) | |
| return None | |
| def format_search(pub_id: str, content: dict) -> str: | |
| return f""" | |
| arXiv publication ID : {pub_id} | |
| Title : {content["title"]} | |
| Authors : {content["authors"]} | |
| Release Date : {content["date"]} | |
| Abstract : {content["abstract"]} | |
| PDF link : {content["pdf"]} | |
| """ | |
| def format_extract(message: dict) -> str: | |
| return f""" | |
| Title of PDF : {message.get("title", "No title has been found")} | |
| Text : {message.get("text", "No text !")} | |
| """ | |
| def format_result_search(page: dict) -> str: | |
| return f""" | |
| Title : {page.get("title", "No titles found !")} | |
| Little description : {page.get("body", "No description")} | |
| PDF url : {page.get("url", None)} | |
| """ | |
| def format_3gpp_doc_result(result: dict, release: int = None) -> str: | |
| return f""" | |
| Document ID : {result.get("doc_id")} | |
| Release version : {release if release is not None else "Not specified"} | |
| URL : {result.get("url", "No URL found !")} | |
| """ | |
| # Tools | |
| async def get_publications(keyword: str, limit: int = 15) -> str: | |
| """ | |
| Get arXiv publications based on keywords and limit of documents | |
| Args: | |
| keyword: Keywords separated by spaces | |
| limit: Numbers of maximum publications returned (by default, 15) | |
| """ | |
| url = f"{CUSTOM_ARXIV_API_BASE}/search" | |
| data = await make_request(url, data={'keyword': keyword, 'limit': limit}) | |
| if data["error"]: | |
| return data["message"] | |
| if not data: | |
| return "Unable to fetch publications" | |
| if len(data["message"].keys()) == 0: | |
| return "No publications found" | |
| publications = [format_search(pub_id, content) for (pub_id, content) in data["message"].items()] | |
| return "\n--\n".join(publications) | |
| async def web_pdf_search(query: str) -> str: | |
| """ | |
| Search on the Web (with DuckDuckGo search engine) to get PDF documents based on the keywords | |
| Args: | |
| query: Keywords to search documents on the Web | |
| """ | |
| url = f"{DDG_API_BASE}/search" | |
| data = await make_request(url, data={"query": query}) | |
| if not data: | |
| return "Unable to fetch results" | |
| if len(data["results"]) == 0: | |
| return "No results found" | |
| results = [format_result_search(result) for result in data["results"]] | |
| return "\n--\n".join(results) | |
| async def get_3gpp_doc_url_byID(doc_id: str, release: int = None): | |
| """ | |
| Get 3GPP Technical Document URL by their document ID. | |
| Args: | |
| doc_id: Document ID (i.e. C4-125411, SP-551242, 31.101) | |
| release : The release version of the document (by default, None) | |
| """ | |
| url = f"{API_3GPP_BASE}/find" | |
| data = await make_request(url, data={"doc_id": doc_id, "release": release}) | |
| if not data: | |
| return "Unable to search document in 3GPP" | |
| return format_3gpp_doc_result(data, release) | |
| async def get_pdf_text(pdf_url: str, limit_page: int = -1) -> str: | |
| """ | |
| Extract the text from the URL pointing to a PDF file | |
| Args: | |
| pdf_url: URL to a PDF document | |
| limit_page: How many pages the user wants to extract the content (default: -1 for all pages) | |
| """ | |
| url = f"{CUSTOM_ARXIV_API_BASE}/extract_pdf/url" | |
| data = {"url": pdf_url} | |
| if limit_page != -1: | |
| data["page_num"] = limit_page | |
| data = await make_request(url, data=data) | |
| if data["error"]: | |
| return data["message"] | |
| if not data: | |
| return "Unable to extract PDF text" | |
| if len(data["message"].keys()) == 0: | |
| return "No text can be extracted from this PDF" | |
| return format_extract(data["message"]) | |
| if __name__ == "__main__": | |
| mcp.run(transport="stdio") |