Spaces:
Sleeping
Sleeping
change
Browse files
app.py
CHANGED
|
@@ -21,6 +21,13 @@ logging.basicConfig(
|
|
| 21 |
format="%(asctime)s %(levelname)s %(name)s - %(message)s",
|
| 22 |
)
|
| 23 |
logger = logging.getLogger("linkedin_mcp")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
mcp = FastMCP(
|
|
@@ -66,6 +73,31 @@ def _ensure_absolute_url(href: str) -> str:
|
|
| 66 |
return f"https://www.linkedin.com/{href}"
|
| 67 |
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
def _parse_jobs_from_html(html_text: str) -> list[JobPosting]:
|
| 70 |
try:
|
| 71 |
from selectolax.parser import HTMLParser
|
|
@@ -287,7 +319,15 @@ def _search_page(
|
|
| 287 |
logger.debug("GET main page: %s", base_url)
|
| 288 |
resp = client.get(base_url, follow_redirects=True, timeout=20.0)
|
| 289 |
resp.raise_for_status()
|
| 290 |
-
logger.debug(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
jobs = _parse_jobs_from_html(resp.text)
|
| 292 |
logger.debug("Parsed %d jobs from main page", len(jobs))
|
| 293 |
|
|
@@ -298,17 +338,31 @@ def _search_page(
|
|
| 298 |
)
|
| 299 |
logger.debug("GET fragment fallback: %s", fragment_url)
|
| 300 |
frag_resp = client.get(fragment_url, follow_redirects=True, timeout=20.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
if frag_resp.status_code == 200:
|
| 302 |
-
|
|
|
|
|
|
|
| 303 |
jobs = _parse_jobs_from_html(frag_resp.text)
|
| 304 |
logger.debug("Parsed %d jobs from fragment", len(jobs))
|
| 305 |
else:
|
| 306 |
logger.debug("Fragment request returned status=%d", frag_resp.status_code)
|
| 307 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
return jobs
|
| 309 |
|
| 310 |
|
| 311 |
-
@mcp.tool(description="Search LinkedIn job listings and return structured job postings.")
|
| 312 |
def search_linkedin_jobs(
|
| 313 |
query: str,
|
| 314 |
location: Optional[str] = None,
|
|
@@ -408,6 +462,10 @@ def search_linkedin_jobs(
|
|
| 408 |
return all_jobs[:max_items]
|
| 409 |
|
| 410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
if __name__ == "__main__":
|
| 412 |
-
logger.info("Starting linkedin-jobs MCP server")
|
| 413 |
-
mcp.run(transport="http")
|
|
|
|
| 21 |
format="%(asctime)s %(levelname)s %(name)s - %(message)s",
|
| 22 |
)
|
| 23 |
logger = logging.getLogger("linkedin_mcp")
|
| 24 |
+
logger.setLevel(_numeric_level)
|
| 25 |
+
if not logger.handlers:
|
| 26 |
+
_handler = logging.StreamHandler()
|
| 27 |
+
_handler.setLevel(_numeric_level)
|
| 28 |
+
_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s - %(message)s"))
|
| 29 |
+
logger.addHandler(_handler)
|
| 30 |
+
logger.propagate = False
|
| 31 |
|
| 32 |
|
| 33 |
mcp = FastMCP(
|
|
|
|
| 73 |
return f"https://www.linkedin.com/{href}"
|
| 74 |
|
| 75 |
|
| 76 |
+
def _detect_block_or_wall(text: str) -> Optional[str]:
|
| 77 |
+
lowered = text.lower()
|
| 78 |
+
hints = [
|
| 79 |
+
"captcha",
|
| 80 |
+
"are you a robot",
|
| 81 |
+
"robot check",
|
| 82 |
+
"unusual activity",
|
| 83 |
+
"sign in",
|
| 84 |
+
"signin",
|
| 85 |
+
"log in",
|
| 86 |
+
"please sign in",
|
| 87 |
+
"you’re seeing this message because",
|
| 88 |
+
"to view this page, you must",
|
| 89 |
+
]
|
| 90 |
+
for hint in hints:
|
| 91 |
+
if hint in lowered:
|
| 92 |
+
return hint
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def _summarize_body(text: str, limit: int = 300) -> str:
|
| 97 |
+
collapsed = re.sub(r"\s+", " ", text).strip()
|
| 98 |
+
return collapsed[:limit] + ("…" if len(collapsed) > limit else "")
|
| 99 |
+
|
| 100 |
+
|
| 101 |
def _parse_jobs_from_html(html_text: str) -> list[JobPosting]:
|
| 102 |
try:
|
| 103 |
from selectolax.parser import HTMLParser
|
|
|
|
| 319 |
logger.debug("GET main page: %s", base_url)
|
| 320 |
resp = client.get(base_url, follow_redirects=True, timeout=20.0)
|
| 321 |
resp.raise_for_status()
|
| 322 |
+
logger.debug(
|
| 323 |
+
"Main page status=%d bytes=%d content-type=%s",
|
| 324 |
+
resp.status_code,
|
| 325 |
+
len(resp.content),
|
| 326 |
+
resp.headers.get("content-type"),
|
| 327 |
+
)
|
| 328 |
+
block_hint = _detect_block_or_wall(resp.text)
|
| 329 |
+
if block_hint:
|
| 330 |
+
logger.warning("Main page may be blocked/walled (hint=%r)", block_hint)
|
| 331 |
jobs = _parse_jobs_from_html(resp.text)
|
| 332 |
logger.debug("Parsed %d jobs from main page", len(jobs))
|
| 333 |
|
|
|
|
| 338 |
)
|
| 339 |
logger.debug("GET fragment fallback: %s", fragment_url)
|
| 340 |
frag_resp = client.get(fragment_url, follow_redirects=True, timeout=20.0)
|
| 341 |
+
logger.debug(
|
| 342 |
+
"Fragment status=%d bytes=%d content-type=%s",
|
| 343 |
+
frag_resp.status_code,
|
| 344 |
+
len(frag_resp.content),
|
| 345 |
+
frag_resp.headers.get("content-type"),
|
| 346 |
+
)
|
| 347 |
if frag_resp.status_code == 200:
|
| 348 |
+
block_hint = _detect_block_or_wall(frag_resp.text)
|
| 349 |
+
if block_hint:
|
| 350 |
+
logger.warning("Fragment page may be blocked/walled (hint=%r)", block_hint)
|
| 351 |
jobs = _parse_jobs_from_html(frag_resp.text)
|
| 352 |
logger.debug("Parsed %d jobs from fragment", len(jobs))
|
| 353 |
else:
|
| 354 |
logger.debug("Fragment request returned status=%d", frag_resp.status_code)
|
| 355 |
|
| 356 |
+
if len(jobs) == 0:
|
| 357 |
+
logger.info(
|
| 358 |
+
"Zero jobs after main+fragment. Body sample: %s",
|
| 359 |
+
_summarize_body(resp.text or frag_resp.text or ""),
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
return jobs
|
| 363 |
|
| 364 |
|
| 365 |
+
@mcp.tool(name="Linkedin_demo_search_linkedin_jobs", description="Search LinkedIn job listings and return structured job postings.")
|
| 366 |
def search_linkedin_jobs(
|
| 367 |
query: str,
|
| 368 |
location: Optional[str] = None,
|
|
|
|
| 462 |
return all_jobs[:max_items]
|
| 463 |
|
| 464 |
|
| 465 |
+
# Log tool registration explicitly for visibility in managed environments
|
| 466 |
+
logger.info("Tool registered: Linkedin_demo_search_linkedin_jobs")
|
| 467 |
+
|
| 468 |
+
|
| 469 |
if __name__ == "__main__":
|
| 470 |
+
logger.info("Starting linkedin-jobs MCP server (streamable-http) on %s:%s", "0.0.0.0", 7860)
|
| 471 |
+
mcp.run(transport="streamable-http")
|