Refactor search_web function for improved readability: format parameters, enhance example usage, and streamline payload preparation.
Browse files
app.py
CHANGED
|
@@ -39,7 +39,9 @@ limiter = MovingWindowRateLimiter(storage)
|
|
| 39 |
rate_limit = parse("200/hour")
|
| 40 |
|
| 41 |
|
| 42 |
-
async def search_web(
|
|
|
|
|
|
|
| 43 |
"""
|
| 44 |
Search the web for information or fresh news, returning extracted content.
|
| 45 |
|
|
@@ -74,7 +76,7 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
| 74 |
Returns error message if API key is missing or search fails.
|
| 75 |
|
| 76 |
Examples:
|
| 77 |
-
- search_web("OpenAI GPT-5", "news"
|
| 78 |
- search_web("python tutorial", "search") - Get 4 general results about Python (default count)
|
| 79 |
- search_web("stock market today", "news", 10) - Get 10 news articles about today's market
|
| 80 |
- search_web("machine learning basics") - Get 4 general search results (all defaults)
|
|
@@ -86,7 +88,7 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
| 86 |
if num_results is None:
|
| 87 |
num_results = 4
|
| 88 |
num_results = max(1, min(20, num_results))
|
| 89 |
-
|
| 90 |
# Validate search_type
|
| 91 |
if search_type not in ["search", "news"]:
|
| 92 |
search_type = "search"
|
|
@@ -97,14 +99,16 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
| 97 |
return "Error: Rate limit exceeded. Please try again later (limit: 200 requests per hour)."
|
| 98 |
|
| 99 |
# Select endpoint based on search type
|
| 100 |
-
endpoint =
|
| 101 |
-
|
|
|
|
|
|
|
| 102 |
# Prepare payload
|
| 103 |
payload = {"q": query, "num": num_results}
|
| 104 |
if search_type == "news":
|
| 105 |
payload["type"] = "news"
|
| 106 |
payload["page"] = 1
|
| 107 |
-
|
| 108 |
async with httpx.AsyncClient(timeout=15) as client:
|
| 109 |
resp = await client.post(endpoint, headers=HEADERS, json=payload)
|
| 110 |
|
|
@@ -116,11 +120,9 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
| 116 |
results = resp.json().get("news", [])
|
| 117 |
else:
|
| 118 |
results = resp.json().get("organic", [])
|
| 119 |
-
|
| 120 |
if not results:
|
| 121 |
-
return
|
| 122 |
-
f"No {search_type} results found for query: '{query}'. Try a different search term or search type."
|
| 123 |
-
)
|
| 124 |
|
| 125 |
# Fetch HTML content concurrently
|
| 126 |
urls = [r["link"] for r in results]
|
|
@@ -152,12 +154,14 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
| 152 |
try:
|
| 153 |
date_str = meta.get("date", "")
|
| 154 |
if date_str:
|
| 155 |
-
date_iso = dateparser.parse(date_str, fuzzy=True).strftime(
|
|
|
|
|
|
|
| 156 |
else:
|
| 157 |
date_iso = "Unknown"
|
| 158 |
except Exception:
|
| 159 |
date_iso = "Unknown"
|
| 160 |
-
|
| 161 |
chunk = (
|
| 162 |
f"## {meta['title']}\n"
|
| 163 |
f"**Source:** {meta.get('source', 'Unknown')} "
|
|
@@ -167,15 +171,15 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
| 167 |
)
|
| 168 |
else:
|
| 169 |
# Search results don't have date/source but have domain
|
| 170 |
-
domain = meta[
|
| 171 |
-
|
| 172 |
chunk = (
|
| 173 |
f"## {meta['title']}\n"
|
| 174 |
f"**Domain:** {domain}\n"
|
| 175 |
f"**URL:** {meta['link']}\n\n"
|
| 176 |
f"{body.strip()}\n"
|
| 177 |
)
|
| 178 |
-
|
| 179 |
chunks.append(chunk)
|
| 180 |
|
| 181 |
if not chunks:
|
|
@@ -222,7 +226,7 @@ with gr.Blocks(title="Web Search MCP Server") as demo:
|
|
| 222 |
label="Search Type",
|
| 223 |
info="Choose search type",
|
| 224 |
)
|
| 225 |
-
|
| 226 |
with gr.Row():
|
| 227 |
num_results_input = gr.Slider(
|
| 228 |
minimum=1,
|
|
@@ -259,7 +263,9 @@ with gr.Blocks(title="Web Search MCP Server") as demo:
|
|
| 259 |
)
|
| 260 |
|
| 261 |
search_button.click(
|
| 262 |
-
fn=search_web,
|
|
|
|
|
|
|
| 263 |
)
|
| 264 |
|
| 265 |
|
|
|
|
| 39 |
rate_limit = parse("200/hour")
|
| 40 |
|
| 41 |
|
| 42 |
+
async def search_web(
|
| 43 |
+
query: str, search_type: str = "search", num_results: Optional[int] = 4
|
| 44 |
+
) -> str:
|
| 45 |
"""
|
| 46 |
Search the web for information or fresh news, returning extracted content.
|
| 47 |
|
|
|
|
| 76 |
Returns error message if API key is missing or search fails.
|
| 77 |
|
| 78 |
Examples:
|
| 79 |
+
- search_web("OpenAI GPT-5", "news") - Get 5 fresh news articles about OpenAI
|
| 80 |
- search_web("python tutorial", "search") - Get 4 general results about Python (default count)
|
| 81 |
- search_web("stock market today", "news", 10) - Get 10 news articles about today's market
|
| 82 |
- search_web("machine learning basics") - Get 4 general search results (all defaults)
|
|
|
|
| 88 |
if num_results is None:
|
| 89 |
num_results = 4
|
| 90 |
num_results = max(1, min(20, num_results))
|
| 91 |
+
|
| 92 |
# Validate search_type
|
| 93 |
if search_type not in ["search", "news"]:
|
| 94 |
search_type = "search"
|
|
|
|
| 99 |
return "Error: Rate limit exceeded. Please try again later (limit: 200 requests per hour)."
|
| 100 |
|
| 101 |
# Select endpoint based on search type
|
| 102 |
+
endpoint = (
|
| 103 |
+
SERPER_NEWS_ENDPOINT if search_type == "news" else SERPER_SEARCH_ENDPOINT
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
# Prepare payload
|
| 107 |
payload = {"q": query, "num": num_results}
|
| 108 |
if search_type == "news":
|
| 109 |
payload["type"] = "news"
|
| 110 |
payload["page"] = 1
|
| 111 |
+
|
| 112 |
async with httpx.AsyncClient(timeout=15) as client:
|
| 113 |
resp = await client.post(endpoint, headers=HEADERS, json=payload)
|
| 114 |
|
|
|
|
| 120 |
results = resp.json().get("news", [])
|
| 121 |
else:
|
| 122 |
results = resp.json().get("organic", [])
|
| 123 |
+
|
| 124 |
if not results:
|
| 125 |
+
return f"No {search_type} results found for query: '{query}'. Try a different search term or search type."
|
|
|
|
|
|
|
| 126 |
|
| 127 |
# Fetch HTML content concurrently
|
| 128 |
urls = [r["link"] for r in results]
|
|
|
|
| 154 |
try:
|
| 155 |
date_str = meta.get("date", "")
|
| 156 |
if date_str:
|
| 157 |
+
date_iso = dateparser.parse(date_str, fuzzy=True).strftime(
|
| 158 |
+
"%Y-%m-%d"
|
| 159 |
+
)
|
| 160 |
else:
|
| 161 |
date_iso = "Unknown"
|
| 162 |
except Exception:
|
| 163 |
date_iso = "Unknown"
|
| 164 |
+
|
| 165 |
chunk = (
|
| 166 |
f"## {meta['title']}\n"
|
| 167 |
f"**Source:** {meta.get('source', 'Unknown')} "
|
|
|
|
| 171 |
)
|
| 172 |
else:
|
| 173 |
# Search results don't have date/source but have domain
|
| 174 |
+
domain = meta["link"].split("/")[2].replace("www.", "")
|
| 175 |
+
|
| 176 |
chunk = (
|
| 177 |
f"## {meta['title']}\n"
|
| 178 |
f"**Domain:** {domain}\n"
|
| 179 |
f"**URL:** {meta['link']}\n\n"
|
| 180 |
f"{body.strip()}\n"
|
| 181 |
)
|
| 182 |
+
|
| 183 |
chunks.append(chunk)
|
| 184 |
|
| 185 |
if not chunks:
|
|
|
|
| 226 |
label="Search Type",
|
| 227 |
info="Choose search type",
|
| 228 |
)
|
| 229 |
+
|
| 230 |
with gr.Row():
|
| 231 |
num_results_input = gr.Slider(
|
| 232 |
minimum=1,
|
|
|
|
| 263 |
)
|
| 264 |
|
| 265 |
search_button.click(
|
| 266 |
+
fn=search_web,
|
| 267 |
+
inputs=[query_input, search_type_input, num_results_input],
|
| 268 |
+
outputs=output,
|
| 269 |
)
|
| 270 |
|
| 271 |
|