Jofthomas commited on
Commit
73d8b6c
·
1 Parent(s): 0d299fc
Files changed (1) hide show
  1. app.py +63 -5
app.py CHANGED
@@ -21,6 +21,13 @@ logging.basicConfig(
21
  format="%(asctime)s %(levelname)s %(name)s - %(message)s",
22
  )
23
  logger = logging.getLogger("linkedin_mcp")
 
 
 
 
 
 
 
24
 
25
 
26
  mcp = FastMCP(
@@ -66,6 +73,31 @@ def _ensure_absolute_url(href: str) -> str:
66
  return f"https://www.linkedin.com/{href}"
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def _parse_jobs_from_html(html_text: str) -> list[JobPosting]:
70
  try:
71
  from selectolax.parser import HTMLParser
@@ -287,7 +319,15 @@ def _search_page(
287
  logger.debug("GET main page: %s", base_url)
288
  resp = client.get(base_url, follow_redirects=True, timeout=20.0)
289
  resp.raise_for_status()
290
- logger.debug("Main page status=%d bytes=%d", resp.status_code, len(resp.content))
 
 
 
 
 
 
 
 
291
  jobs = _parse_jobs_from_html(resp.text)
292
  logger.debug("Parsed %d jobs from main page", len(jobs))
293
 
@@ -298,17 +338,31 @@ def _search_page(
298
  )
299
  logger.debug("GET fragment fallback: %s", fragment_url)
300
  frag_resp = client.get(fragment_url, follow_redirects=True, timeout=20.0)
 
 
 
 
 
 
301
  if frag_resp.status_code == 200:
302
- logger.debug("Fragment status=%d bytes=%d", frag_resp.status_code, len(frag_resp.content))
 
 
303
  jobs = _parse_jobs_from_html(frag_resp.text)
304
  logger.debug("Parsed %d jobs from fragment", len(jobs))
305
  else:
306
  logger.debug("Fragment request returned status=%d", frag_resp.status_code)
307
 
 
 
 
 
 
 
308
  return jobs
309
 
310
 
311
- @mcp.tool(description="Search LinkedIn job listings and return structured job postings.")
312
  def search_linkedin_jobs(
313
  query: str,
314
  location: Optional[str] = None,
@@ -408,6 +462,10 @@ def search_linkedin_jobs(
408
  return all_jobs[:max_items]
409
 
410
 
 
 
 
 
411
  if __name__ == "__main__":
412
- logger.info("Starting linkedin-jobs MCP server")
413
- mcp.run(transport="http")
 
21
  format="%(asctime)s %(levelname)s %(name)s - %(message)s",
22
  )
23
  logger = logging.getLogger("linkedin_mcp")
24
+ logger.setLevel(_numeric_level)
25
+ if not logger.handlers:
26
+ _handler = logging.StreamHandler()
27
+ _handler.setLevel(_numeric_level)
28
+ _handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s - %(message)s"))
29
+ logger.addHandler(_handler)
30
+ logger.propagate = False
31
 
32
 
33
  mcp = FastMCP(
 
73
  return f"https://www.linkedin.com/{href}"
74
 
75
 
76
+ def _detect_block_or_wall(text: str) -> Optional[str]:
77
+ lowered = text.lower()
78
+ hints = [
79
+ "captcha",
80
+ "are you a robot",
81
+ "robot check",
82
+ "unusual activity",
83
+ "sign in",
84
+ "signin",
85
+ "log in",
86
+ "please sign in",
87
+ "you’re seeing this message because",
88
+ "to view this page, you must",
89
+ ]
90
+ for hint in hints:
91
+ if hint in lowered:
92
+ return hint
93
+ return None
94
+
95
+
96
+ def _summarize_body(text: str, limit: int = 300) -> str:
97
+ collapsed = re.sub(r"\s+", " ", text).strip()
98
+ return collapsed[:limit] + ("…" if len(collapsed) > limit else "")
99
+
100
+
101
  def _parse_jobs_from_html(html_text: str) -> list[JobPosting]:
102
  try:
103
  from selectolax.parser import HTMLParser
 
319
  logger.debug("GET main page: %s", base_url)
320
  resp = client.get(base_url, follow_redirects=True, timeout=20.0)
321
  resp.raise_for_status()
322
+ logger.debug(
323
+ "Main page status=%d bytes=%d content-type=%s",
324
+ resp.status_code,
325
+ len(resp.content),
326
+ resp.headers.get("content-type"),
327
+ )
328
+ block_hint = _detect_block_or_wall(resp.text)
329
+ if block_hint:
330
+ logger.warning("Main page may be blocked/walled (hint=%r)", block_hint)
331
  jobs = _parse_jobs_from_html(resp.text)
332
  logger.debug("Parsed %d jobs from main page", len(jobs))
333
 
 
338
  )
339
  logger.debug("GET fragment fallback: %s", fragment_url)
340
  frag_resp = client.get(fragment_url, follow_redirects=True, timeout=20.0)
341
+ logger.debug(
342
+ "Fragment status=%d bytes=%d content-type=%s",
343
+ frag_resp.status_code,
344
+ len(frag_resp.content),
345
+ frag_resp.headers.get("content-type"),
346
+ )
347
  if frag_resp.status_code == 200:
348
+ block_hint = _detect_block_or_wall(frag_resp.text)
349
+ if block_hint:
350
+ logger.warning("Fragment page may be blocked/walled (hint=%r)", block_hint)
351
  jobs = _parse_jobs_from_html(frag_resp.text)
352
  logger.debug("Parsed %d jobs from fragment", len(jobs))
353
  else:
354
  logger.debug("Fragment request returned status=%d", frag_resp.status_code)
355
 
356
+ if len(jobs) == 0:
357
+ logger.info(
358
+ "Zero jobs after main+fragment. Body sample: %s",
359
+ _summarize_body(resp.text or frag_resp.text or ""),
360
+ )
361
+
362
  return jobs
363
 
364
 
365
+ @mcp.tool(name="Linkedin_demo_search_linkedin_jobs", description="Search LinkedIn job listings and return structured job postings.")
366
  def search_linkedin_jobs(
367
  query: str,
368
  location: Optional[str] = None,
 
462
  return all_jobs[:max_items]
463
 
464
 
465
+ # Log tool registration explicitly for visibility in managed environments
466
+ logger.info("Tool registered: Linkedin_demo_search_linkedin_jobs")
467
+
468
+
469
  if __name__ == "__main__":
470
+ logger.info("Starting linkedin-jobs MCP server (streamable-http) on %s:%s", "0.0.0.0", 7860)
471
+ mcp.run(transport="streamable-http")