Spaces:

OrganizedProgrammers
/

MCPSynapseChat

Sleeping

MCPSynapseChat / server.py

Omar ID EL MOUMEN

Final version

8227e25 8 months ago

4.59 kB

	from typing import Any, Literal
	import httpx
	import traceback
	from mcp.server.fastmcp import FastMCP

	# Initialize FastMCP server
	mcp = FastMCP("arxiv-omar")

	# Constants
	CUSTOM_ARXIV_API_BASE = "https://om4r932-arxiv.hf.space"
	DDG_API_BASE = "https://ychkhan-ptt-endpoints.hf.space"
	API_3GPP_BASE = "https://organizedprogrammers-3gppdocfinder.hf.space"

	# Helpers
	async def make_request(url: str, data: dict = None) -> dict[str, Any] \| None:
	if data is None:
	return None
	headers = {
	"Accept": "application/json"
	}
	async with httpx.AsyncClient(verify=False) as client:
	try:
	response = await client.post(url, headers=headers, json=data)
	print(response)
	response.raise_for_status()
	return response.json()
	except Exception as e:
	traceback.print_exception(e)
	return None

	def format_search(pub_id: str, content: dict) -> str:
	return f"""
	arXiv publication ID : {pub_id}
	Title : {content["title"]}
	Authors : {content["authors"]}
	Release Date : {content["date"]}
	Abstract : {content["abstract"]}
	PDF link : {content["pdf"]}
	"""

	def format_extract(message: dict) -> str:
	return f"""
	Title of PDF : {message.get("title", "No title has been found")}
	Text : {message.get("text", "No text !")}
	"""

	def format_result_search(page: dict) -> str:
	return f"""
	Title : {page.get("title", "No titles found !")}
	Little description : {page.get("body", "No description")}
	PDF url : {page.get("url", None)}
	"""

	def format_3gpp_doc_result(result: dict, release: int = None) -> str:
	return f"""
	Document ID : {result.get("doc_id")}
	Release version : {release if release is not None else "Not specified"}
	URL : {result.get("url", "No URL found !")}
	"""

	# Tools
	@mcp.tool()
	async def get_publications(keyword: str, limit: int = 15) -> str:
	"""
	Get arXiv publications based on keywords and limit of documents

	Args:
	keyword: Keywords separated by spaces
	limit: Numbers of maximum publications returned (by default, 15)
	"""
	url = f"{CUSTOM_ARXIV_API_BASE}/search"
	data = await make_request(url, data={'keyword': keyword, 'limit': limit})
	if data["error"]:
	return data["message"]
	if not data:
	return "Unable to fetch publications"
	if len(data["message"].keys()) == 0:
	return "No publications found"

	publications = [format_search(pub_id, content) for (pub_id, content) in data["message"].items()]
	return "\n--\n".join(publications)

	@mcp.tool()
	async def web_pdf_search(query: str) -> str:
	"""
	Search on the Web (with DuckDuckGo search engine) to get PDF documents based on the keywords

	Args:
	query: Keywords to search documents on the Web
	"""

	url = f"{DDG_API_BASE}/search"
	data = await make_request(url, data={"query": query})
	if not data:
	return "Unable to fetch results"
	if len(data["results"]) == 0:
	return "No results found"

	results = [format_result_search(result) for result in data["results"]]
	return "\n--\n".join(results)

	@mcp.tool()
	async def get_3gpp_doc_url_byID(doc_id: str, release: int = None):
	"""
	Get 3GPP Technical Document URL by their document ID.

	Args:
	doc_id: Document ID (i.e. C4-125411, SP-551242, 31.101)
	release : The release version of the document (by default, None)
	"""
	url = f"{API_3GPP_BASE}/find"
	data = await make_request(url, data={"doc_id": doc_id, "release": release})
	if not data:
	return "Unable to search document in 3GPP"

	return format_3gpp_doc_result(data, release)

	@mcp.tool()
	async def get_pdf_text(pdf_url: str, limit_page: int = -1) -> str:
	"""
	Extract the text from the URL pointing to a PDF file

	Args:
	pdf_url: URL to a PDF document
	limit_page: How many pages the user wants to extract the content (default: -1 for all pages)
	"""

	url = f"{CUSTOM_ARXIV_API_BASE}/extract_pdf/url"
	data = {"url": pdf_url}
	if limit_page != -1:
	data["page_num"] = limit_page
	data = await make_request(url, data=data)
	if data["error"]:
	return data["message"]
	if not data:
	return "Unable to extract PDF text"
	if len(data["message"].keys()) == 0:
	return "No text can be extracted from this PDF"

	return format_extract(data["message"])

	if __name__ == "__main__":
	mcp.run(transport="stdio")