Jofthomas commited on
Commit
0d299fc
·
1 Parent(s): 4969b87
Files changed (1) hide show
  1. app.py +48 -1
app.py CHANGED
@@ -11,6 +11,16 @@ import httpx
11
  from pydantic import BaseModel, Field, HttpUrl
12
 
13
  from fastmcp import FastMCP
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  mcp = FastMCP(
@@ -70,6 +80,7 @@ def _parse_jobs_from_html(html_text: str) -> list[JobPosting]:
70
 
71
  # Prefer list items with data-occludable-job-id when available
72
  cards = tree.css("li[data-occludable-job-id], .base-search-card, .job-search-card")
 
73
  for card in cards:
74
  job_id = card.attributes.get("data-occludable-job-id")
75
 
@@ -148,6 +159,7 @@ def _parse_jobs_from_html(html_text: str) -> list[JobPosting]:
148
  if not jobs:
149
  anchors = tree.css("a[href*='/jobs/view/']")
150
  seen_ids: set[str] = set()
 
151
  for a in anchors:
152
  href = a.attributes.get("href") or ""
153
  if not href:
@@ -176,6 +188,7 @@ def _parse_jobs_from_html(html_text: str) -> list[JobPosting]:
176
  except Exception:
177
  continue
178
 
 
179
  return jobs
180
 
181
 
@@ -271,18 +284,26 @@ def _search_page(
271
  params: dict,
272
  ) -> list[JobPosting]:
273
  base_url = "https://www.linkedin.com/jobs/search/?" + urlencode(params)
 
274
  resp = client.get(base_url, follow_redirects=True, timeout=20.0)
275
  resp.raise_for_status()
 
276
  jobs = _parse_jobs_from_html(resp.text)
 
277
 
278
  # If nothing parsed, try the fragment endpoint as a fallback regardless of page
279
  if len(jobs) == 0:
280
  fragment_url = (
281
  "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search?" + urlencode(params)
282
  )
 
283
  frag_resp = client.get(fragment_url, follow_redirects=True, timeout=20.0)
284
  if frag_resp.status_code == 200:
 
285
  jobs = _parse_jobs_from_html(frag_resp.text)
 
 
 
286
 
287
  return jobs
288
 
@@ -326,6 +347,20 @@ def search_linkedin_jobs(
326
 
327
  with httpx.Client(headers=headers) as client:
328
  start = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  for _page in range(pages):
330
  active_params = _build_search_params(
331
  keywords=query,
@@ -340,27 +375,39 @@ def search_linkedin_jobs(
340
  )
341
 
342
  try:
 
343
  jobs = _search_page(client, params=active_params)
344
  except httpx.HTTPStatusError as e:
345
  status = e.response.status_code
 
 
 
 
 
346
  if status in (401, 403, 429):
 
347
  break
348
  raise
349
- except Exception:
 
350
  jobs = []
351
 
352
  if not jobs:
 
353
  break
354
 
355
  all_jobs.extend(jobs)
356
  if len(all_jobs) >= max_items:
 
357
  break
358
 
359
  start += 25
360
  time.sleep(0.8)
361
 
 
362
  return all_jobs[:max_items]
363
 
364
 
365
  if __name__ == "__main__":
 
366
  mcp.run(transport="http")
 
11
  from pydantic import BaseModel, Field, HttpUrl
12
 
13
  from fastmcp import FastMCP
14
+ import logging
15
+
16
+ # Logging configuration
17
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
18
+ _numeric_level = getattr(logging, LOG_LEVEL, logging.INFO)
19
+ logging.basicConfig(
20
+ level=_numeric_level,
21
+ format="%(asctime)s %(levelname)s %(name)s - %(message)s",
22
+ )
23
+ logger = logging.getLogger("linkedin_mcp")
24
 
25
 
26
  mcp = FastMCP(
 
80
 
81
  # Prefer list items with data-occludable-job-id when available
82
  cards = tree.css("li[data-occludable-job-id], .base-search-card, .job-search-card")
83
+ logger.debug("HTML parse: found %d potential job cards", len(cards))
84
  for card in cards:
85
  job_id = card.attributes.get("data-occludable-job-id")
86
 
 
159
  if not jobs:
160
  anchors = tree.css("a[href*='/jobs/view/']")
161
  seen_ids: set[str] = set()
162
+ logger.debug("HTML parse fallback: scanning %d anchors with /jobs/view/", len(anchors))
163
  for a in anchors:
164
  href = a.attributes.get("href") or ""
165
  if not href:
 
188
  except Exception:
189
  continue
190
 
191
+ logger.debug("HTML parse complete: %d jobs parsed", len(jobs))
192
  return jobs
193
 
194
 
 
284
  params: dict,
285
  ) -> list[JobPosting]:
286
  base_url = "https://www.linkedin.com/jobs/search/?" + urlencode(params)
287
+ logger.debug("GET main page: %s", base_url)
288
  resp = client.get(base_url, follow_redirects=True, timeout=20.0)
289
  resp.raise_for_status()
290
+ logger.debug("Main page status=%d bytes=%d", resp.status_code, len(resp.content))
291
  jobs = _parse_jobs_from_html(resp.text)
292
+ logger.debug("Parsed %d jobs from main page", len(jobs))
293
 
294
  # If nothing parsed, try the fragment endpoint as a fallback regardless of page
295
  if len(jobs) == 0:
296
  fragment_url = (
297
  "https://www.linkedin.com/jobs-guest/jobs/api/seeMoreJobPostings/search?" + urlencode(params)
298
  )
299
+ logger.debug("GET fragment fallback: %s", fragment_url)
300
  frag_resp = client.get(fragment_url, follow_redirects=True, timeout=20.0)
301
  if frag_resp.status_code == 200:
302
+ logger.debug("Fragment status=%d bytes=%d", frag_resp.status_code, len(frag_resp.content))
303
  jobs = _parse_jobs_from_html(frag_resp.text)
304
+ logger.debug("Parsed %d jobs from fragment", len(jobs))
305
+ else:
306
+ logger.debug("Fragment request returned status=%d", frag_resp.status_code)
307
 
308
  return jobs
309
 
 
347
 
348
  with httpx.Client(headers=headers) as client:
349
  start = 0
350
+ logger.info(
351
+ "Search start: query=%r location=%r limit=%d pages=%d sort_by=%s date_posted=%s exp=%s job_types=%s remote=%s geo_id=%s cookie_present=%s",
352
+ query,
353
+ location,
354
+ limit,
355
+ pages,
356
+ sort_by,
357
+ date_posted,
358
+ experience_levels,
359
+ job_types,
360
+ remote,
361
+ geo_id,
362
+ bool(cookie),
363
+ )
364
  for _page in range(pages):
365
  active_params = _build_search_params(
366
  keywords=query,
 
375
  )
376
 
377
  try:
378
+ logger.debug("Page fetch: start=%d params=%s", start, active_params)
379
  jobs = _search_page(client, params=active_params)
380
  except httpx.HTTPStatusError as e:
381
  status = e.response.status_code
382
+ try:
383
+ failed_url = str(e.request.url)
384
+ except Exception:
385
+ failed_url = "<unknown>"
386
+ logger.warning("HTTP error status=%d url=%s", status, failed_url)
387
  if status in (401, 403, 429):
388
+ logger.info("Stopping due to auth/rate limit status=%d", status)
389
  break
390
  raise
391
+ except Exception as ex:
392
+ logger.exception("Unexpected error during page fetch: %s", ex)
393
  jobs = []
394
 
395
  if not jobs:
396
+ logger.info("No jobs parsed for start=%d; stopping further requests", start)
397
  break
398
 
399
  all_jobs.extend(jobs)
400
  if len(all_jobs) >= max_items:
401
+ logger.info("Reached max_items=%d; stopping pagination", max_items)
402
  break
403
 
404
  start += 25
405
  time.sleep(0.8)
406
 
407
+ logger.info("Search complete: returning %d jobs", len(all_jobs[:max_items]))
408
  return all_jobs[:max_items]
409
 
410
 
411
  if __name__ == "__main__":
412
+ logger.info("Starting linkedin-jobs MCP server")
413
  mcp.run(transport="http")