aaronjosephd commited on
Commit
79c8e6d
·
1 Parent(s): e64f4d8

feat: disable similarity worker for NER-only benchmark

Browse files
Files changed (1) hide show
  1. main.py +3 -32
main.py CHANGED
@@ -321,40 +321,11 @@ async def analyze_resume(
321
  user_skills = [ent.text for ent in doc.ents if ent.label_ == "SKILL"]
322
  user_tools = [ent.text for ent in doc.ents if ent.label_ == "TOOL"]
323
 
324
- # --- Similarity Search (via Isolated Subprocess) ---
325
- # Fetch a large number of jobs to cache for pagination
326
  all_similar_jobs = []
327
  total_similar_jobs = 0
328
- try:
329
- backend_dir = pathlib.Path(__file__).parent.resolve()
330
- worker_path = backend_dir / "similarity_worker.py"
331
- worker_python_executable = "/app/similarity_env/bin/python3"
332
-
333
- process = subprocess.run(
334
- [
335
- str(worker_python_executable),
336
- str(worker_path),
337
- "--target_role",
338
- target_role or "Overall Market",
339
- "--limit",
340
- "200" # Fetch a large batch for caching
341
- ],
342
- input=similarity_cleaned_text,
343
- capture_output=True,
344
- text=True,
345
- check=True,
346
- )
347
-
348
- worker_output = json.loads(process.stdout)
349
- all_similar_jobs = worker_output.get("similar_jobs", [])
350
- total_similar_jobs = worker_output.get("total_jobs", 0)
351
-
352
- except (subprocess.CalledProcessError, json.JSONDecodeError, FileNotFoundError) as e:
353
- print(f"Similarity worker failed: {e}", file=sys.stderr)
354
- if isinstance(e, subprocess.CalledProcessError):
355
- print(f"Worker stderr: {e.stderr}", file=sys.stderr)
356
- all_similar_jobs = []
357
- total_similar_jobs = 0
358
 
359
  # --- Cache the full results ---
360
  session_id = str(uuid.uuid4())
 
321
  user_skills = [ent.text for ent in doc.ents if ent.label_ == "SKILL"]
322
  user_tools = [ent.text for ent in doc.ents if ent.label_ == "TOOL"]
323
 
324
+ # --- Similarity Search (DISABLED for NER-only benchmarking) ---
 
325
  all_similar_jobs = []
326
  total_similar_jobs = 0
327
+ # The similarity worker subprocess call is bypassed for this benchmark.
328
+ # The original code for similarity search was here.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
  # --- Cache the full results ---
331
  session_id = str(uuid.uuid4())