pquiggles commited on
Commit
9347485
·
1 Parent(s): ad2d836

fixed a lot of stuff to meet spencer's requirements

Browse files
Files changed (28) hide show
  1. pyproject.toml +1 -0
  2. src/notebooks/classifiers/education_classifier.ipynb +28 -28
  3. src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb +44 -5
  4. src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb +62 -5
  5. src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb +58 -5
  6. src/notebooks/classifiers/work_experience/work_experience_classifier.ipynb +90 -5
  7. src/vsp/app/1st_gradio.py +111 -0
  8. src/vsp/app/bindings.py +4 -1
  9. src/vsp/app/classifiers/education_classifier.py +0 -3
  10. src/vsp/app/classifiers/work_experience/general_work_experience_classifier.py +2 -9
  11. src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py +5 -11
  12. src/vsp/app/classifiers/work_experience/investing_focus_sector_classifier.py +1 -11
  13. src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py +1 -10
  14. src/vsp/app/main.py +98 -5
  15. src/vsp/app/model/linkedin/linkedin_models.py +3 -6
  16. src/vsp/app/prompts/education_classifier/1 - education_classifier_human.txt +1 -1
  17. src/vsp/app/prompts/education_classifier/1 - education_classifier_system.txt +0 -1
  18. src/vsp/app/prompts/work_experience_classifier/1 - work_experience_classifier_human.txt +1 -1
  19. src/vsp/app/prompts/work_experience_classifier/1 - work_experience_classifier_system.txt +6 -5
  20. src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_human.txt +2 -2
  21. src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_system.txt +8 -5
  22. src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_human.txt +2 -2
  23. src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_system.txt +9 -5
  24. src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_human.txt +1 -1
  25. src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_system.txt +1 -3
  26. src/vsp/app/scrapers/linkedin_downloader.py +61 -1
  27. tests/vsp/app/test_main.py +0 -206
  28. uv.lock +0 -0
pyproject.toml CHANGED
@@ -6,6 +6,7 @@ requires-python = ">=3.12"
6
  dependencies = [
7
  "aiohttp>=3.10.5",
8
  "boto3>=1.35.12",
 
9
  "ipykernel>=6.29.5",
10
  "openai>=1.43.0",
11
  "pydantic>=2.8.2",
 
6
  dependencies = [
7
  "aiohttp>=3.10.5",
8
  "boto3>=1.35.12",
9
+ "gradio>=4.44.0",
10
  "ipykernel>=6.29.5",
11
  "openai>=1.43.0",
12
  "pydantic>=2.8.2",
src/notebooks/classifiers/education_classifier.ipynb CHANGED
@@ -2,14 +2,14 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
@@ -30,7 +30,7 @@
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
- "with open(\"../tests/test_data/hansae_catlett.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
@@ -39,50 +39,50 @@
39
  },
40
  {
41
  "cell_type": "code",
42
- "execution_count": 2,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
- "\u001b[2m2024-09-11 14:33:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
- "\u001b[2m2024-09-11 14:33:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
- "\u001b[2m2024-09-11 14:33:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
- "\u001b[2m2024-09-11 14:33:12\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
- "\u001b[2m2024-09-11 14:33:12\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
- "\u001b[2m2024-09-11 14:33:12\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
- "\u001b[2m2024-09-11 14:33:12\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
- "\u001b[2m2024-09-11 14:33:12\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
- "\u001b[2m2024-09-11 14:33:13\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
- "\u001b[2m2024-09-11 14:33:13\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
- "\u001b[2m2024-09-11 14:33:14\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
- "\u001b[2m2024-09-11 14:33:14\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1018\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m116\u001b[0m\n",
61
- "\u001b[2m2024-09-11 14:33:14\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
- "\u001b[2m2024-09-11 14:33:14\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m975\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m140\u001b[0m\n",
63
- "\u001b[2m2024-09-11 14:33:15\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
- "\u001b[2m2024-09-11 14:33:15\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1064\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m109\u001b[0m\n",
65
- "\u001b[2m2024-09-11 14:33:15\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
- "\u001b[2m2024-09-11 14:33:15\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1026\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m111\u001b[0m\n",
67
  "{\n",
68
  " \"output\": \"Graduate School\",\n",
69
  " \"confidence\": 1.0,\n",
70
- " \"reasoning\": \"The specific Linkedin education item clearly states that the candidate obtained a Master of Public Policy (M.P.P.) from Stanford University, which aligns perfectly with the information provided in the full resume. The degree is a graduate-level qualification, and the period of study (2016 to 2019) matches the timeline in the resume. Additionally, the description of the candidate's work during this program further supports the classification as a graduate school education. Therefore, it is accurately classified as GRAD_SCHOOL.\"\n",
71
  "}\n",
72
  "{\n",
73
  " \"output\": \"Undergraduate (Incomplete)\",\n",
74
  " \"confidence\": 0.9,\n",
75
- " \"reasoning\": \"The specific Linkedin education item indicates that the candidate participated in a \\\"Study Abroad\\\" program at the University of New South Wales, focusing on Engineering, Philosophy, and Finance. This aligns with the resume, which shows that the candidate completed a Bachelor of Arts at Harvard University, suggesting that the study abroad experience was part of their undergraduate education. Since the degree is classified as \\\"Study Abroad\\\" and does not indicate completion of a separate degree, it falls under the category of \\\"Undergraduate (Incomplete).\\\" The high confidence level is due to the clear connection between the study abroad experience and the candidate's undergraduate studies.\"\n",
76
  "}\n",
77
  "{\n",
78
- " \"output\": \"MBA\",\n",
79
  " \"confidence\": 1.0,\n",
80
- " \"reasoning\": \"The specific Linkedin education item clearly states that the candidate obtained a Master of Business Administration (M.B.A.) from Stanford University Graduate School of Business, with the period of study from 2016 to 2019. This matches exactly with the information provided in the full resume, which also lists the same degree and institution. The presence of various activities and awards associated with the MBA program further supports the classification as an MBA. Therefore, the classification is confidently categorized as \\\"MBA.\\\"\"\n",
81
  "}\n",
82
  "{\n",
83
- " \"output\": \"Undergraduate (Completed)\",\n",
84
  " \"confidence\": 1.0,\n",
85
- " \"reasoning\": \"The specific Linkedin education item clearly states that the candidate earned a Bachelor of Arts degree in Biomedical Engineering and Philosophy from Harvard University, with a graduation period from 2007 to 2011. This aligns perfectly with the information provided in the full resume, which also lists the same degree and institution, confirming its completion. The description \\\"Graduated with Honors\\\" further supports that this is a completed undergraduate degree. Therefore, it is classified as \\\"Undergraduate (Completed).\\\"\"\n",
86
  "}\n"
87
  ]
88
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 3,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/hansae_catlett.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 4,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
+ "\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1100\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1057\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m103\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1108\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1146\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
67
  "{\n",
68
  " \"output\": \"Graduate School\",\n",
69
  " \"confidence\": 1.0,\n",
70
+ " \"reasoning\": \"The specific Linkedin education item indicates that the candidate earned a Master of Public Policy (M.P.P.) from Stanford University, which aligns with the completed graduate degree listed in the full resume. This classification fits the GRAD_SCHOOL category as it is a Master's degree in a field other than Business Administration or Law. The details provided confirm the completion of this degree during the specified period.\"\n",
71
  "}\n",
72
  "{\n",
73
  " \"output\": \"Undergraduate (Incomplete)\",\n",
74
  " \"confidence\": 0.9,\n",
75
+ " \"reasoning\": \"The specific Linkedin education item indicates a \\\"Study Abroad\\\" program at the University of New South Wales, which aligns with the incomplete undergraduate studies mentioned in the resume. The resume lists a completed undergraduate degree from Harvard University, but the study abroad experience does not constitute a completed degree itself, thus fitting the category of UNDERGRAD_INCOMPLETE. The confidence is high due to the clear distinction between completed and incomplete educational experiences.\"\n",
76
  "}\n",
77
  "{\n",
78
+ " \"output\": \"Undergraduate (Completed)\",\n",
79
  " \"confidence\": 1.0,\n",
80
+ " \"reasoning\": \"The specific Linkedin education item details a Bachelor of Arts degree in Biomedical Engineering and Philosophy from Harvard University, which aligns perfectly with the information provided in the full resume. The resume confirms the completion of this undergraduate degree, as it states the same degree and institution, along with the graduation period from 2007 to 2011. Therefore, it is classified as a completed undergraduate degree.\"\n",
81
  "}\n",
82
  "{\n",
83
+ " \"output\": \"MBA\",\n",
84
  " \"confidence\": 1.0,\n",
85
+ " \"reasoning\": \"The specific Linkedin education item clearly states that the candidate obtained a Master of Business Administration (M.B.A.) from Stanford University Graduate School of Business between 2016 and 2019. This aligns perfectly with the information in the full resume, which also lists the same degree and institution. The high confidence level is due to the direct match in degree type and institution, confirming the classification as an MBA.\"\n",
86
  "}\n"
87
  ]
88
  }
src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import json\n",
10
  "import os\n",
@@ -22,7 +30,7 @@
22
  "print(os.getcwd())\n",
23
  "\n",
24
  "\n",
25
- "with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
@@ -31,9 +39,40 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": null,
35
  "metadata": {},
36
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "import asyncio\n",
39
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import json\n",
18
  "import os\n",
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 3,
43
  "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
53
+ "{\n",
54
+ " \"investment_banking_group\": \"Private Funds\",\n",
55
+ " \"confidence\": 0.9,\n",
56
+ " \"reasoning\": \"The candidate's role as an Investment Professional at Accel-KKR, a private equity firm focused on technology investments, aligns closely with the PRIVATE_FUNDS group. Their work involves acquisitions, buyouts, and structured investments, which are typical activities in private equity. Additionally, the candidate's experience at Fidelity Investments in corporate finance suggests a strong background in financial transactions, further supporting this classification.\"\n",
57
+ "}\n",
58
+ "{\n",
59
+ " \"investment_banking_group\": \"Generalist\",\n",
60
+ " \"confidence\": 0.8,\n",
61
+ " \"reasoning\": \"The candidate worked in the Investment Banking division at William Blair & Company, a firm known for providing a wide range of financial advisory services, which suggests a generalist role. The absence of specific details in the work experience description indicates that the candidate may have been involved in various transactions across different sectors rather than specializing in a particular area. Additionally, their subsequent role at Accel-KKR, a private equity firm focused on technology, further supports the idea of a generalist background prior to specializing.\"\n",
62
+ "}\n",
63
+ "{\n",
64
+ " \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
65
+ " \"confidence\": 0.8,\n",
66
+ " \"reasoning\": \"The candidate worked in the FFAS Corporate Finance division at Fidelity Investments, a major player in the financial services sector. This role likely involved financial advisory services related to financial institutions, aligning with the focus of the FINANCIAL_INSTITUTIONS group. The candidate's experience at Fidelity, combined with their education in Business Economics, supports this classification, although the lack of specific details in the job description leaves some uncertainty.\"\n",
67
+ "}\n",
68
+ "{\n",
69
+ " \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
70
+ " \"confidence\": 0.8,\n",
71
+ " \"reasoning\": \"The candidate worked in the PI Corporate Finance division at Fidelity Investments, a major player in the financial services sector. This role likely involved financial advisory services related to financial institutions, aligning with the FINANCIAL_INSTITUTIONS group. The candidate's experience at Fidelity, combined with their background in investment banking and private equity, supports this classification, although the lack of specific details in the work experience description leads to a slightly lower confidence level.\"\n",
72
+ "}\n"
73
+ ]
74
+ }
75
+ ],
76
  "source": [
77
  "import asyncio\n",
78
  "\n",
src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import json\n",
10
  "import os\n",
@@ -22,7 +30,7 @@
22
  "print(os.getcwd())\n",
23
  "\n",
24
  "\n",
25
- "with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
@@ -31,9 +39,58 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": null,
35
  "metadata": {},
36
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "import asyncio\n",
39
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 3,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import json\n",
18
  "import os\n",
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 4,
43
  "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1084\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m99\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1158\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m114\u001b[0m\n",
67
+ "{\n",
68
+ " \"investing_focus_asset_class\": \"Unclassifiable\",\n",
69
+ " \"confidence\": 0.3,\n",
70
+ " \"reasoning\": \"The specific work experience at Fidelity Investments as \\\"FFAS Corporate Finance\\\" does not provide enough detail about the nature of the investments or financial activities undertaken during that time. Without a description of the specific focus or asset class related to this role, it is difficult to classify it accurately. The lack of information leads to a low confidence level in making a definitive classification.\",\n",
71
+ " \"other_description\": null\n",
72
+ "}\n",
73
+ "{\n",
74
+ " \"investing_focus_asset_class\": \"Unclassifiable\",\n",
75
+ " \"confidence\": 0.3,\n",
76
+ " \"reasoning\": \"The specific work experience item at Fidelity Investments as \\\"PI Corporate Finance\\\" does not provide any details about the nature of the investments or financial activities undertaken in that role. Without specific information regarding the focus on asset classes or investment strategies, it is difficult to classify this experience accurately. The lack of a description or responsibilities limits the ability to determine a clear investing focus or asset class.\",\n",
77
+ " \"other_description\": null\n",
78
+ "}\n",
79
+ "{\n",
80
+ " \"investing_focus_asset_class\": \"Unclassifiable\",\n",
81
+ " \"confidence\": 0.3,\n",
82
+ " \"reasoning\": \"The specific work experience at William Blair & Company is labeled as \\\"Investment Banking,\\\" but there is no detailed description of the responsibilities or types of transactions involved. Without specific information on whether the focus was on equity, debt, mergers, or acquisitions, it is difficult to classify this experience into one of the predefined investing focus or asset class categories. The lack of detail leads to uncertainty in classification.\",\n",
83
+ " \"other_description\": null\n",
84
+ "}\n",
85
+ "{\n",
86
+ " \"investing_focus_asset_class\": \"Private Equity / Buyouts\",\n",
87
+ " \"confidence\": 0.9,\n",
88
+ " \"reasoning\": \"The specific work experience at Accel-KKR indicates a focus on private equity investments, particularly in technology and software sectors. The description highlights typical transactions such as acquisitions, buyouts of divisions from public companies, and take-private transactions, which are all characteristic of private equity buyouts. This aligns well with the classification of PRIVATE_EQUITY_BUYOUTS. The high confidence level reflects the clear alignment of the job responsibilities with this asset class.\",\n",
89
+ " \"other_description\": null\n",
90
+ "}\n"
91
+ ]
92
+ }
93
+ ],
94
  "source": [
95
  "import asyncio\n",
96
  "\n",
src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import json\n",
10
  "import os\n",
@@ -22,7 +30,7 @@
22
  "print(os.getcwd())\n",
23
  "\n",
24
  "\n",
25
- "with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
@@ -31,9 +39,54 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": null,
35
  "metadata": {},
36
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "import asyncio\n",
39
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import json\n",
18
  "import os\n",
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:13:38\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:13:38\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m88\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1080\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1154\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
67
+ "{\n",
68
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
69
+ " \"confidence\": 0.7,\n",
70
+ " \"reasoning\": \"The specific work experience item indicates that the candidate worked at Fidelity Investments, a company clearly categorized under financial services. The title \\\"FFAS Corporate Finance\\\" suggests involvement in financial analysis and corporate finance activities, which aligns with the financial institutions sector. However, the lack of detailed responsibilities in the description leads to a slightly lower confidence level.\"\n",
71
+ "}\n",
72
+ "{\n",
73
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
74
+ " \"confidence\": 0.8,\n",
75
+ " \"reasoning\": \"The specific work experience item indicates that the candidate worked at Fidelity Investments, a company clearly categorized under financial services. The title \\\"PI Corporate Finance\\\" suggests involvement in financial activities, likely related to investment management or corporate finance within the financial sector. While the description lacks detail, the context of the company and the role strongly supports classification in the financial institutions sector.\"\n",
76
+ "}\n",
77
+ "{\n",
78
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
79
+ " \"confidence\": 0.8,\n",
80
+ " \"reasoning\": \"The specific work experience item indicates that the candidate worked in Investment Banking at William Blair & Company, which is categorized under Financial Services. This aligns with the focus on financial institutions, as investment banking is a core component of this sector. The absence of a detailed description does not detract significantly from the classification, as the job title and company industry provide clear context.\"\n",
81
+ "}\n",
82
+ "{\n",
83
+ " \"investing_focus_sector\": \"Technology / Software / TMT\",\n",
84
+ " \"confidence\": 0.9,\n",
85
+ " \"reasoning\": \"The specific work experience at Accel-KKR clearly indicates a focus on investing in software and technology-enabled services companies. The description highlights that the firm is dedicated exclusively to this sector, which aligns directly with the TECHNOLOGY_SOFTWARE_TMT category. The confidence level is high due to the explicit mention of the firm's focus area in both the resume and the work experience item.\"\n",
86
+ "}\n"
87
+ ]
88
+ }
89
+ ],
90
  "source": [
91
  "import asyncio\n",
92
  "\n",
src/notebooks/classifiers/work_experience/work_experience_classifier.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import json\n",
10
  "import os\n",
@@ -22,7 +30,7 @@
22
  "print(os.getcwd())\n",
23
  "\n",
24
  "\n",
25
- "with open(\"../tests/test_data/hansae_catlett.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
@@ -31,9 +39,86 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": null,
35
  "metadata": {},
36
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "import asyncio\n",
39
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 3,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import json\n",
18
  "import os\n",
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/hansae_catlett.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 4,
43
  "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
59
+ "{\n",
60
+ " \"primary_job_type\": \"Full-time\",\n",
61
+ " \"secondary_job_type\": \"Investing\",\n",
62
+ " \"confidence\": 0.9,\n",
63
+ " \"reasoning\": \"The work experience as a Partner at HOF Capital is a full-time role in a venture capital firm, which aligns with the candidate's ongoing career in investing. Given that the candidate has been involved in venture capital and private equity roles, this position is classified as FULL_TIME. The secondary job type is classified as INVESTING, as the role directly involves investment activities typical of a venture capital firm. This classification is supported by the candidate's previous experience as Vice President at Bessemer Venture Partners and their role as Co-Founder at The MBA Fund, both of which are also in the investing domain.\"\n",
64
+ "}\n",
65
+ "{\n",
66
+ " \"primary_job_type\": \"Full-time\",\n",
67
+ " \"secondary_job_type\": \"Investing\",\n",
68
+ " \"confidence\": 0.9,\n",
69
+ " \"reasoning\": \"The work experience at The MBA Fund is classified as FULL_TIME because it is a co-founding role in a venture capital firm, indicating ongoing and regular employment. The responsibilities described involve significant engagement with startups and investment activities, which aligns with a full-time commitment. For the secondary job type, it is classified as INVESTING since the role involves backing and supporting startup founders, which is characteristic of venture capital activities. This is supported by the description of the firm and the candidate's role as a General Partner, which typically involves making investment decisions and managing a portfolio of investments.\"\n",
70
+ "}\n",
71
+ "{\n",
72
+ " \"primary_job_type\": \"Full-time\",\n",
73
+ " \"secondary_job_type\": \"Investing\",\n",
74
+ " \"confidence\": 1.0,\n",
75
+ " \"reasoning\": \"The work experience as Vice President at Bessemer Venture Partners is classified as FULL_TIME because it is a full-time position that spans over four years, which aligns with the criteria for regular ongoing employment. The secondary job type is classified as INVESTING, as the role is within a venture capital firm, which involves professional investing activities. This is supported by the candidate's extensive background in venture capital and private equity, as indicated in both the resume and the specific work experience item.\"\n",
76
+ "}\n",
77
+ "{\n",
78
+ " \"primary_job_type\": \"Other\",\n",
79
+ " \"secondary_job_type\": \"Other\",\n",
80
+ " \"confidence\": 0.9,\n",
81
+ " \"reasoning\": \"The work experience as a Board Observer at Rillavoice is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities. Given that this position is not a full-time employment role and does not fit the criteria for an internship, it is classified as OTHER for the primary job type. For the secondary job type, since the role involves oversight and strategic input in a software company, it does not fit into the categories of investing, banking, or consulting, thus it is classified as OTHER as well.\"\n",
82
+ "}\n",
83
+ "{\n",
84
+ " \"primary_job_type\": \"Other\",\n",
85
+ " \"secondary_job_type\": \"Investing\",\n",
86
+ " \"confidence\": 0.9,\n",
87
+ " \"reasoning\": \"The work experience at Archy is classified as an INVESTING role because the title \\\"Investor\\\" indicates a professional investing position, which aligns with the candidate's experience in venture capital as seen in their role as Vice President at Bessemer Venture Partners and Co-Founder at The MBA Fund. The duration from 2022 to 2024 suggests a longer-term commitment, which is typical for investing roles rather than internships. The primary job type is classified as OTHER since there is no indication of full-time employment or internship status, and it does not fit into the other primary categories.\"\n",
88
+ "}\n",
89
+ "{\n",
90
+ " \"primary_job_type\": \"Advisory / Board / Independent Investor\",\n",
91
+ " \"secondary_job_type\": \"Other\",\n",
92
+ " \"confidence\": 0.9,\n",
93
+ " \"reasoning\": \"The work experience as a Board Observer at MaintainX is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities. Given that this position is not a full-time role and does not indicate a regular employment setup, it does not fit the FULL_TIME category. The duration from 2021 to 2024 suggests a longer-term commitment, but the nature of the role indicates it is more advisory in nature. Therefore, I classify it as ADVISORY_BOARD_INVESTOR for the primary job type. For the secondary job type, since the role involves oversight and strategic input rather than direct investment activities, it does not fit into the INVESTING category. Instead, it aligns more closely with the OTHER category, as it does not fit the other defined roles.\"\n",
94
+ "}\n",
95
+ "{\n",
96
+ " \"primary_job_type\": \"Full-time\",\n",
97
+ " \"secondary_job_type\": \"Investing\",\n",
98
+ " \"confidence\": 0.9,\n",
99
+ " \"reasoning\": \"The work experience at Spot AI is classified as an INVESTING role because the title \\\"Investor\\\" indicates a professional investing position, which aligns with the candidate's experience in venture capital and investment roles as seen in their resume. The duration from 2021 to 2024 suggests a long-term commitment, which rules out the possibility of it being an internship. The primary job type is classified as FULL_TIME since the role is ongoing and likely compensated, fitting the criteria for regular employment.\"\n",
100
+ "}\n",
101
+ "{\n",
102
+ " \"primary_job_type\": \"Other\",\n",
103
+ " \"secondary_job_type\": \"Entrepreneur / Founder\",\n",
104
+ " \"confidence\": 0.9,\n",
105
+ " \"reasoning\": \"The work experience as a Board Observer at VendorPM is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities rather than a full-time employment setup. Given that the candidate has been involved in this role from 2021 to 2024, it suggests a part-time or advisory capacity rather than a full-time position. Therefore, the Primary Job Type is classified as OTHER, as it does not fit the other categories. For the Secondary Job Type, this role is best classified as ENTREPRENEUR_FOUNDER since it involves oversight and advisory functions in a startup environment, which is common for board observer roles in venture-backed companies.\"\n",
106
+ "}\n",
107
+ "{\n",
108
+ " \"primary_job_type\": \"Full-time\",\n",
109
+ " \"secondary_job_type\": \"CorpDev / Strategy\",\n",
110
+ " \"confidence\": 0.9,\n",
111
+ " \"reasoning\": \"The work experience as a Board Director at Luxury Presence is a long-term role from 2021 to 2024, indicating a significant commitment that aligns with a full-time position rather than a temporary or part-time role. Given that this position involves governance and oversight, it does not fit the criteria for an internship or extracurricular activity. In terms of secondary job type, the role of Board Director typically involves strategic decision-making and oversight, which aligns with CORPDEV_STRATEGY as it is a corporate governance role rather than a direct investment role.\"\n",
112
+ "}\n",
113
+ "{\n",
114
+ " \"primary_job_type\": \"Other\",\n",
115
+ " \"secondary_job_type\": \"Investing\",\n",
116
+ " \"confidence\": 0.9,\n",
117
+ " \"reasoning\": \"The work experience at ServiceTitan is classified as an INVESTING role because the title \\\"Investor\\\" indicates a focus on investment activities, which aligns with the candidate's background in venture capital and investment roles as seen in their resume. The candidate has been involved in various investment capacities, including their role as Vice President at Bessemer Venture Partners and as a Co-Founder at The MBA Fund, both of which emphasize their expertise in investing. The duration of the role from 2021 to 2024 suggests a longer-term commitment, which is typical for professional investing roles rather than internships or part-time positions. For the Primary Job Type, since this role is not a full-time position but rather an investment role, it is classified as OTHER, as it does not fit the other primary categories like FULL_TIME or INTERNSHIP.\"\n",
118
+ "}\n"
119
+ ]
120
+ }
121
+ ],
122
  "source": [
123
  "import asyncio\n",
124
  "\n",
src/vsp/app/1st_gradio.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import nest_asyncio
3
+
4
+ # Apply nest_asyncio to allow nested event loops
5
+ nest_asyncio.apply()
6
+
7
+ # Import your custom modules
8
+ from vsp.app.scrapers.linkedin_downloader import LinkedinDownloader
9
+ from vsp.app.main import VspDataEnrichment
10
+
11
+ async def process_profile(profile_linkedin):
12
+ downloader = LinkedinDownloader()
13
+ enricher = VspDataEnrichment()
14
+ profile = await downloader.fetch_linkedin_data(linkedin_url=profile_linkedin)
15
+ enriched_profile = await enricher.process_linkedin_profile(profile=profile)
16
+ return enriched_profile
17
+
18
+ async def analyze_profile(profile_linkedin):
19
+ enriched_profile = await process_profile(profile_linkedin)
20
+ # Generate output from enriched_profile
21
+ education_outputs = []
22
+ work_experience_outputs = []
23
+
24
+ # Process classified educations
25
+ for idx, edu in enumerate(enriched_profile.classified_educations, 1):
26
+ school = edu.education.school_name
27
+ degree = edu.education.degree
28
+ year = edu.education.end.year if edu.education.end else "N/A"
29
+ classification = edu.classification.output.value
30
+ education_outputs.append(f"### Education {idx}\n"
31
+ f"**School:** {school}\n\n"
32
+ f"**Degree:** {degree}\n\n"
33
+ f"**Year:** {year}\n\n"
34
+ f"**Classification:** {classification}\n")
35
+
36
+ # Add total years of full-time work experience
37
+ total_experience_years = enriched_profile.full_time_work_experience_years
38
+ experience_by_secondary = enriched_profile.full_time_work_experience_by_secondary
39
+
40
+ experience_output = f"### Total Full-Time Work Experience: {total_experience_years} years\n\n"
41
+
42
+ if experience_by_secondary:
43
+ experience_output += "### Work Experience by Secondary Job Type:\n"
44
+ for secondary_job_type, years in experience_by_secondary.items():
45
+ experience_output += f"- {secondary_job_type.value}: {years} years\n"
46
+
47
+ # Process classified work experiences
48
+ for idx, exp in enumerate(enriched_profile.classified_work_experiences, 1):
49
+ company = exp.position.company_name
50
+ start_year = exp.position.start.year if exp.position.start else "N/A"
51
+ end_year = exp.position.end.year if (exp.position.end and exp.position.end.year) else "Present"
52
+ time_range = f"{start_year} - {end_year}"
53
+ title = exp.position.title
54
+ primary_job_type = exp.work_experience_classification.primary_job_type.value
55
+ secondary_job_type = exp.work_experience_classification.secondary_job_type.value
56
+
57
+ work_exp_str = (f"### Work Experience {idx}\n"
58
+ f"**Company:** {company}\n\n"
59
+ f"**Time Range:** {time_range}\n\n"
60
+ f"**Title:** {title}\n\n"
61
+ f"**Primary Job Type:** {primary_job_type}\n\n"
62
+ f"**Secondary Job Type:** {secondary_job_type}\n\n")
63
+
64
+ # Investing focus
65
+ if exp.investing_focus_asset_class_classification:
66
+ asset_class = exp.investing_focus_asset_class_classification.investing_focus_asset_class.value
67
+ sector = (
68
+ exp.investing_focus_sector_classification.investing_focus_sector.value
69
+ if exp.investing_focus_sector_classification else "N/A"
70
+ )
71
+ work_exp_str += f"**Investing Focus (Asset Class):** {asset_class}\n\n"
72
+ work_exp_str += f"**Investing Focus (Sector):** {sector}\n\n"
73
+
74
+ # Investment banking classification
75
+ if exp.investment_banking_classification:
76
+ ib_group = exp.investment_banking_classification.investment_banking_group.value
77
+ work_exp_str += f"**Investment Banking Group:** {ib_group}\n"
78
+
79
+ work_experience_outputs.append(work_exp_str)
80
+
81
+ # Combine outputs
82
+ education_output = '\n\n'.join(education_outputs)
83
+ work_experience_output = '\n\n'.join(work_experience_outputs)
84
+
85
+ full_output = f"# Classified Educations\n\n{education_output}\n\n# Classified Work Experiences\n\n{experience_output}\n\n{work_experience_output}"
86
+ return full_output
87
+
88
+ def main():
89
+ # Define Gradio interface
90
+ with gr.Blocks() as demo:
91
+ gr.Markdown("# LinkedIn Profile Analyzer")
92
+ gr.Markdown("Enter a LinkedIn profile URL to analyze educational and work experiences.")
93
+
94
+ profile_linkedin = gr.Textbox(label="LinkedIn Profile URL")
95
+ analyze_button = gr.Button("Analyze")
96
+ output = gr.Markdown()
97
+
98
+ async def on_analyze_click(profile_linkedin):
99
+ if not profile_linkedin:
100
+ return "Please enter a valid LinkedIn Profile URL."
101
+ try:
102
+ result = await analyze_profile(profile_linkedin)
103
+ return result
104
+ except Exception as e:
105
+ return f"An error occurred: {str(e)}"
106
+
107
+ analyze_button.click(fn=on_analyze_click, inputs=profile_linkedin, outputs=output)
108
+ demo.launch()
109
+
110
+ if __name__ == "__main__":
111
+ main()
src/vsp/app/bindings.py CHANGED
@@ -5,7 +5,10 @@ from vsp.llm.llm_cache import LLMCache
5
  from vsp.llm.openai.openai import AsyncOpenAIService
6
  from vsp.llm.openai.openai_model import OpenAIModel
7
 
 
 
8
  prompt_loader = PromptLoader()
9
  llm_cache = LLMCache()
10
- open_ai_service = CachedLLMService(llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_MINI), cache=llm_cache)
 
11
  linkedin_downloader = LinkedinDownloader()
 
5
  from vsp.llm.openai.openai import AsyncOpenAIService
6
  from vsp.llm.openai.openai_model import OpenAIModel
7
 
8
+
9
+
10
  prompt_loader = PromptLoader()
11
  llm_cache = LLMCache()
12
+ open_ai_service = CachedLLMService(llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_O), cache=llm_cache)
13
+
14
  linkedin_downloader = LinkedinDownloader()
src/vsp/app/classifiers/education_classifier.py CHANGED
@@ -44,12 +44,10 @@ class EducationClassification(BaseModel):
44
 
45
  Attributes:
46
  output (SchoolType): The classified school type.
47
- confidence (float): Confidence level of the classification, between 0.0 and 1.0.
48
  reasoning (str): Explanation for the classification decision.
49
  """
50
 
51
  output: SchoolType = Field(description="The classified school type")
52
- confidence: float = Field(description="Confidence level between 0.0 and 1.0")
53
  reasoning: str = Field(description="Explanation for the classification")
54
  model_config = {"frozen": True} # This makes the model immutable and hashable
55
 
@@ -88,7 +86,6 @@ class EducationClassifier:
88
  case school_type if school_type in _SCHOOL_TYPE_MAPPING:
89
  return EducationClassification(
90
  output=_SCHOOL_TYPE_MAPPING[school_type],
91
- confidence=float(parsed["confidence"]),
92
  reasoning=parsed["reasoning"],
93
  )
94
  case _:
 
44
 
45
  Attributes:
46
  output (SchoolType): The classified school type.
 
47
  reasoning (str): Explanation for the classification decision.
48
  """
49
 
50
  output: SchoolType = Field(description="The classified school type")
 
51
  reasoning: str = Field(description="Explanation for the classification")
52
  model_config = {"frozen": True} # This makes the model immutable and hashable
53
 
 
86
  case school_type if school_type in _SCHOOL_TYPE_MAPPING:
87
  return EducationClassification(
88
  output=_SCHOOL_TYPE_MAPPING[school_type],
 
89
  reasoning=parsed["reasoning"],
90
  )
91
  case _:
src/vsp/app/classifiers/work_experience/general_work_experience_classifier.py CHANGED
@@ -32,7 +32,7 @@ class PrimaryJobType(str, Enum):
32
  """Enumeration of different primary job types for work experience classification."""
33
 
34
  FULL_TIME = "Full-time"
35
- ADVISORY_BOARD_INVESTOR = "Advisory / Board / Independent Investor"
36
  INTERNSHIP = "Internship"
37
  EXTRACURRICULAR = "Extracurricular"
38
  EDUCATION = "Education"
@@ -65,13 +65,11 @@ class WorkExperienceClassification(BaseModel):
65
  Attributes:
66
  primary_job_type (PrimaryJobType): The classified primary job type.
67
  secondary_job_type (SecondaryJobType): The classified secondary job type.
68
- confidence (float): Confidence level of the classification, between 0.0 and 1.0.
69
  reasoning (str): Explanation for the classification decision.
70
  """
71
 
72
  primary_job_type: PrimaryJobType = Field(description="The classified primary job type")
73
  secondary_job_type: SecondaryJobType = Field(description="The classified secondary job type")
74
- confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
75
  reasoning: str = Field(description="Explanation for the classification")
76
  model_config = {"frozen": True} # This makes the model immutable and hashable
77
 
@@ -117,7 +115,7 @@ class WorkExperienceClassifier:
117
  WorkExperienceClassification: A structured representation of the classification result.
118
 
119
  Raises:
120
- ValueError: If the output contains an unknown job type or invalid confidence value.
121
  """
122
  lines = output.strip().split("\n")
123
  parsed: dict[str, Any] = {}
@@ -136,15 +134,10 @@ class WorkExperienceClassifier:
136
  except KeyError as e:
137
  raise ValueError(f"Unknown job type: {str(e)}")
138
 
139
- try:
140
- confidence = float(parsed["confidence"])
141
- except ValueError:
142
- raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
143
 
144
  return WorkExperienceClassification(
145
  primary_job_type=PrimaryJobType(primary_job_type),
146
  secondary_job_type=SecondaryJobType(secondary_job_type),
147
- confidence=confidence,
148
  reasoning=parsed["reasoning"],
149
  )
150
 
 
32
  """Enumeration of different primary job types for work experience classification."""
33
 
34
  FULL_TIME = "Full-time"
35
+ ADVISORY_BOARD_INVESTOR = "Advisory / Board / Investor"
36
  INTERNSHIP = "Internship"
37
  EXTRACURRICULAR = "Extracurricular"
38
  EDUCATION = "Education"
 
65
  Attributes:
66
  primary_job_type (PrimaryJobType): The classified primary job type.
67
  secondary_job_type (SecondaryJobType): The classified secondary job type.
 
68
  reasoning (str): Explanation for the classification decision.
69
  """
70
 
71
  primary_job_type: PrimaryJobType = Field(description="The classified primary job type")
72
  secondary_job_type: SecondaryJobType = Field(description="The classified secondary job type")
 
73
  reasoning: str = Field(description="Explanation for the classification")
74
  model_config = {"frozen": True} # This makes the model immutable and hashable
75
 
 
115
  WorkExperienceClassification: A structured representation of the classification result.
116
 
117
  Raises:
118
+ ValueError: If the output contains an unknown job type.
119
  """
120
  lines = output.strip().split("\n")
121
  parsed: dict[str, Any] = {}
 
134
  except KeyError as e:
135
  raise ValueError(f"Unknown job type: {str(e)}")
136
 
 
 
 
 
137
 
138
  return WorkExperienceClassification(
139
  primary_job_type=PrimaryJobType(primary_job_type),
140
  secondary_job_type=SecondaryJobType(secondary_job_type),
 
141
  reasoning=parsed["reasoning"],
142
  )
143
 
src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py CHANGED
@@ -42,7 +42,7 @@ class InvestingFocusAssetClass(str, Enum):
42
  CREDIT = "Credit"
43
  SECONDARIES = "Secondaries"
44
  OTHER = "Other"
45
- UNCLASSIFIABLE = "Unclassifiable"
46
 
47
 
48
  _INVESTING_FOCUS_ASSET_CLASS_MAPPINGS: Final[dict[str, InvestingFocusAssetClass]] = {
@@ -56,13 +56,11 @@ class InvestingFocusAssetClassClassification(BaseModel):
56
 
57
  Attributes:
58
  investing_focus_asset_class (InvestingFocusAssetClass): The classified investing focus or asset class.
59
- confidence (float): Confidence level of the classification, between 0.0 and 1.0.
60
  reasoning (str): Explanation for the classification decision.
61
  other_description (str | None): Description for 'Other' classification, if applicable.
62
  """
63
 
64
  investing_focus_asset_class: InvestingFocusAssetClass = Field(description="The investing focus or asset class")
65
- confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
66
  reasoning: str = Field(description="Explanation for the classification")
67
  other_description: str | None = Field(default=None, description="Description for 'Other' classification")
68
  model_config = {"frozen": True} # This makes the model immutable and hashable
@@ -111,7 +109,7 @@ class InvestingFocusAssetClassClassifier:
111
  InvestingFocusAssetClassClassification: A structured representation of the classification result.
112
 
113
  Raises:
114
- ValueError: If the output contains an unknown investing focus or asset class or invalid confidence value.
115
  """
116
  lines = output.strip().split("\n")
117
  parsed: dict[str, Any] = {}
@@ -126,14 +124,10 @@ class InvestingFocusAssetClassClassifier:
126
  except KeyError as e:
127
  raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
128
 
129
- try:
130
- confidence = float(parsed["confidence"])
131
- except ValueError:
132
- raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
133
 
134
  classification = InvestingFocusAssetClassClassification(
135
  investing_focus_asset_class=investing_focus_asset_class,
136
- confidence=confidence,
137
  reasoning=parsed["reasoning"],
138
  )
139
 
@@ -141,7 +135,6 @@ class InvestingFocusAssetClassClassifier:
141
  other_description = parsed.get("other_description")
142
  classification = InvestingFocusAssetClassClassification(
143
  investing_focus_asset_class=investing_focus_asset_class,
144
- confidence=confidence,
145
  reasoning=parsed["reasoning"],
146
  other_description=other_description,
147
  )
@@ -174,4 +167,5 @@ class InvestingFocusAssetClassClassifier:
174
  resume=format_profile_as_resume(linkedin_profile),
175
  work_experience=format_position(work_experience),
176
  )
177
- return await prompt.evaluate() # type: ignore
 
 
42
  CREDIT = "Credit"
43
  SECONDARIES = "Secondaries"
44
  OTHER = "Other"
45
+ INSUFFICIENT_INFORMATION = "Insufficient Information"
46
 
47
 
48
  _INVESTING_FOCUS_ASSET_CLASS_MAPPINGS: Final[dict[str, InvestingFocusAssetClass]] = {
 
56
 
57
  Attributes:
58
  investing_focus_asset_class (InvestingFocusAssetClass): The classified investing focus or asset class.
 
59
  reasoning (str): Explanation for the classification decision.
60
  other_description (str | None): Description for 'Other' classification, if applicable.
61
  """
62
 
63
  investing_focus_asset_class: InvestingFocusAssetClass = Field(description="The investing focus or asset class")
 
64
  reasoning: str = Field(description="Explanation for the classification")
65
  other_description: str | None = Field(default=None, description="Description for 'Other' classification")
66
  model_config = {"frozen": True} # This makes the model immutable and hashable
 
109
  InvestingFocusAssetClassClassification: A structured representation of the classification result.
110
 
111
  Raises:
112
+ ValueError: If the output contains an unknown investing focus or asset class.
113
  """
114
  lines = output.strip().split("\n")
115
  parsed: dict[str, Any] = {}
 
124
  except KeyError as e:
125
  raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
126
 
127
+
 
 
 
128
 
129
  classification = InvestingFocusAssetClassClassification(
130
  investing_focus_asset_class=investing_focus_asset_class,
 
131
  reasoning=parsed["reasoning"],
132
  )
133
 
 
135
  other_description = parsed.get("other_description")
136
  classification = InvestingFocusAssetClassClassification(
137
  investing_focus_asset_class=investing_focus_asset_class,
 
138
  reasoning=parsed["reasoning"],
139
  other_description=other_description,
140
  )
 
167
  resume=format_profile_as_resume(linkedin_profile),
168
  work_experience=format_position(work_experience),
169
  )
170
+ result = await prompt.evaluate() # type: ignore
171
+ return result
src/vsp/app/classifiers/work_experience/investing_focus_sector_classifier.py CHANGED
@@ -42,7 +42,7 @@ class InvestingFocusSector(str, Enum):
42
  FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
43
  INFRASTRUCTURE_TRANSPORTATION = "Infrastructure / Transportation"
44
  OTHER = "Other"
45
- UNCLASSIFIABLE = "Unclassifiable"
46
 
47
 
48
  _INVESTING_FOCUS_SECTOR_MAPPINGS: Final[dict[str, InvestingFocusSector]] = {
@@ -56,12 +56,10 @@ class InvestingFocusSectorClassification(BaseModel):
56
 
57
  Attributes:
58
  investing_focus_sector (InvestingFocusSector): The classified investing focus sector.
59
- confidence (float): Confidence level of the classification, between 0.0 and 1.0.
60
  reasoning (str): Explanation for the classification decision.
61
  """
62
 
63
  investing_focus_sector: InvestingFocusSector = Field(description="The investing focus sector")
64
- confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
65
  reasoning: str = Field(description="Explanation for the classification")
66
  model_config = {"frozen": True} # This makes the model immutable and hashable
67
 
@@ -109,8 +107,6 @@ class InvestingFocusSectorClassifier:
109
  Returns:
110
  InvestingFocusSectorClassification: A structured representation of the classification result.
111
 
112
- Raises:
113
- ValueError: If the output contains an unknown investing focus sector or invalid confidence value.
114
  """
115
  lines = output.strip().split("\n")
116
  parsed: dict[str, Any] = {}
@@ -125,14 +121,8 @@ class InvestingFocusSectorClassifier:
125
  except KeyError as e:
126
  raise ValueError(f"Unknown investing focus sector: {str(e)}")
127
 
128
- try:
129
- confidence = float(parsed["confidence"])
130
- except ValueError:
131
- raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
132
-
133
  classification = InvestingFocusSectorClassification(
134
  investing_focus_sector=investing_focus_sector,
135
- confidence=confidence,
136
  reasoning=parsed["reasoning"],
137
  )
138
  return classification
 
42
  FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
43
  INFRASTRUCTURE_TRANSPORTATION = "Infrastructure / Transportation"
44
  OTHER = "Other"
45
+ INSUFFICIENT_INFORMATION = "Insufficent Information"
46
 
47
 
48
  _INVESTING_FOCUS_SECTOR_MAPPINGS: Final[dict[str, InvestingFocusSector]] = {
 
56
 
57
  Attributes:
58
  investing_focus_sector (InvestingFocusSector): The classified investing focus sector.
 
59
  reasoning (str): Explanation for the classification decision.
60
  """
61
 
62
  investing_focus_sector: InvestingFocusSector = Field(description="The investing focus sector")
 
63
  reasoning: str = Field(description="Explanation for the classification")
64
  model_config = {"frozen": True} # This makes the model immutable and hashable
65
 
 
107
  Returns:
108
  InvestingFocusSectorClassification: A structured representation of the classification result.
109
 
 
 
110
  """
111
  lines = output.strip().split("\n")
112
  parsed: dict[str, Any] = {}
 
121
  except KeyError as e:
122
  raise ValueError(f"Unknown investing focus sector: {str(e)}")
123
 
 
 
 
 
 
124
  classification = InvestingFocusSectorClassification(
125
  investing_focus_sector=investing_focus_sector,
 
126
  reasoning=parsed["reasoning"],
127
  )
128
  return classification
src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py CHANGED
@@ -50,7 +50,7 @@ class InvestmentBankingGroup(str, Enum):
50
  FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
51
  INFRASTRUCTURE = "Infrastructure / Transportation"
52
  OTHER = "Other"
53
- UNCLASSIFIABLE = "Unclassifiable"
54
 
55
 
56
  _INVESTMENT_BANKING_GROUP_MAPPINGS: Final[dict[str, InvestmentBankingGroup]] = {
@@ -64,12 +64,10 @@ class InvestmentBankingGroupClassification(BaseModel):
64
 
65
  Attributes:
66
  investment_banking_group (InvestmentBankingGroup): The classified investment banking group.
67
- confidence (float): Confidence level of the classification, between 0.0 and 1.0.
68
  reasoning (str): Explanation for the classification decision.
69
  """
70
 
71
  investment_banking_group: InvestmentBankingGroup = Field(description="The investment banking group")
72
- confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
73
  reasoning: str = Field(description="Explanation for the classification")
74
  model_config = {"frozen": True} # This makes the model immutable and hashable
75
 
@@ -116,8 +114,6 @@ class InvestmentBankingGroupClassifier:
116
  Returns:
117
  InvestmentBankingGroupClassification: A structured representation of the classification result.
118
 
119
- Raises:
120
- ValueError: If the output contains an unknown investment banking group or invalid confidence value.
121
  """
122
  lines = output.strip().split("\n")
123
  parsed: dict[str, Any] = {}
@@ -132,14 +128,9 @@ class InvestmentBankingGroupClassifier:
132
  except KeyError as e:
133
  raise ValueError(f"Unknown investment banking group: {str(e)}")
134
 
135
- try:
136
- confidence = float(parsed["confidence"])
137
- except ValueError:
138
- raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
139
 
140
  return InvestmentBankingGroupClassification(
141
  investment_banking_group=investment_banking_group,
142
- confidence=confidence,
143
  reasoning=parsed["reasoning"],
144
  )
145
 
 
50
  FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
51
  INFRASTRUCTURE = "Infrastructure / Transportation"
52
  OTHER = "Other"
53
+ INSUFFICIENT_INFORMATION = "Insufficient Information"
54
 
55
 
56
  _INVESTMENT_BANKING_GROUP_MAPPINGS: Final[dict[str, InvestmentBankingGroup]] = {
 
64
 
65
  Attributes:
66
  investment_banking_group (InvestmentBankingGroup): The classified investment banking group.
 
67
  reasoning (str): Explanation for the classification decision.
68
  """
69
 
70
  investment_banking_group: InvestmentBankingGroup = Field(description="The investment banking group")
 
71
  reasoning: str = Field(description="Explanation for the classification")
72
  model_config = {"frozen": True} # This makes the model immutable and hashable
73
 
 
114
  Returns:
115
  InvestmentBankingGroupClassification: A structured representation of the classification result.
116
 
 
 
117
  """
118
  lines = output.strip().split("\n")
119
  parsed: dict[str, Any] = {}
 
128
  except KeyError as e:
129
  raise ValueError(f"Unknown investment banking group: {str(e)}")
130
 
 
 
 
 
131
 
132
  return InvestmentBankingGroupClassification(
133
  investment_banking_group=investment_banking_group,
 
134
  reasoning=parsed["reasoning"],
135
  )
136
 
src/vsp/app/main.py CHANGED
@@ -17,12 +17,18 @@ Usage:
17
  """
18
 
19
  import asyncio
20
- from typing import Sequence
 
 
 
 
21
 
22
  from pydantic import BaseModel, Field
23
 
24
  from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
25
  from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
 
 
26
  WorkExperienceClassification,
27
  WorkExperienceClassifier,
28
  )
@@ -41,6 +47,8 @@ from vsp.app.classifiers.work_experience.investment_banking_group_classifier imp
41
  from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
42
 
43
 
 
 
44
  class ClassifiedEducation(BaseModel):
45
  """
46
  Represents a classified education item from a LinkedIn profile.
@@ -84,6 +92,8 @@ class LinkedinProfileClassificationResults(BaseModel):
84
 
85
  classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
86
  classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
 
 
87
 
88
 
89
  class VspDataEnrichment:
@@ -109,6 +119,83 @@ class VspDataEnrichment:
109
  self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
110
  self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
113
  """
114
  Process a LinkedIn profile and classify its education and work experiences.
@@ -155,15 +242,18 @@ class VspDataEnrichment:
155
  work_classification.primary_job_type.INTERNSHIP,
156
  work_classification.primary_job_type.EXTRACURRICULAR,
157
  }:
 
158
  if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTMENT_BANKING:
159
  ib_classification = await self._investment_banking_classifier.classify_investment_banking_group(
160
  profile, position
161
  )
162
  classified_work_experience.investment_banking_classification = ib_classification
163
 
164
- if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING:
165
- asset_class_task = self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
166
- profile, position
 
 
167
  )
168
  sector_task = self._investing_focus_sector_classifier.classify_investing_focus_sector(
169
  profile, position
@@ -176,8 +266,11 @@ class VspDataEnrichment:
176
 
177
  classified_work_experiences.append(classified_work_experience)
178
 
 
 
 
179
  return LinkedinProfileClassificationResults(
180
- classified_educations=classified_educations, classified_work_experiences=classified_work_experiences
181
  )
182
 
183
 
 
17
  """
18
 
19
  import asyncio
20
+ from typing import Sequence, Mapping, List
21
+
22
+ import calendar
23
+ from datetime import date
24
+
25
 
26
  from pydantic import BaseModel, Field
27
 
28
  from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
29
  from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
30
+ PrimaryJobType,
31
+ SecondaryJobType,
32
  WorkExperienceClassification,
33
  WorkExperienceClassifier,
34
  )
 
47
  from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
48
 
49
 
50
+ from collections import defaultdict
51
+
52
  class ClassifiedEducation(BaseModel):
53
  """
54
  Represents a classified education item from a LinkedIn profile.
 
92
 
93
  classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
94
  classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
95
+ full_time_work_experience_years: float = Field(default=0.0)
96
+ full_time_work_experience_by_secondary: Mapping[SecondaryJobType, float] = Field(default_factory=dict)
97
 
98
 
99
  class VspDataEnrichment:
 
119
  self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
120
  self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
121
 
122
+
123
+ def estimate_full_time_experience_by_secondary_job_type(self, classified_work_experiences: List[ClassifiedWorkExperience]) -> Mapping[SecondaryJobType, float]:
124
+ # Define current date
125
+ current_date = date(2024, 9, 18)
126
+
127
+ # List to store all events (start or end of intervals)
128
+ events = []
129
+
130
+ # Set to store all observed SecondaryJobTypes
131
+ observed_secondary_job_types = set()
132
+
133
+ for cwe in classified_work_experiences:
134
+ classification = cwe.work_experience_classification.primary_job_type
135
+ secondary_job_type = cwe.work_experience_classification.secondary_job_type
136
+
137
+ if classification == PrimaryJobType.FULL_TIME and secondary_job_type:
138
+ # Normalize start date
139
+ start = cwe.position.start
140
+ if not start or not start.year:
141
+ continue
142
+ start_year = start.year
143
+ start_month = start.month if start.month else 1
144
+ start_day = start.day if start.day else 1
145
+ start_date = date(start_year, start_month, start_day)
146
+
147
+ # Normalize end date
148
+ end = cwe.position.end
149
+ if end is None:
150
+ end_date = current_date
151
+ else:
152
+ if not end.year:
153
+ continue
154
+ end_year = end.year
155
+ end_month = end.month if end.month else 12
156
+ if end.day:
157
+ end_day = end.day
158
+ else:
159
+ # Get last day of the month
160
+ _, end_day = calendar.monthrange(end_year, end_month)
161
+ end_date = date(end_year, end_month, end_day)
162
+
163
+ if start_date > end_date:
164
+ continue # Skip invalid intervals
165
+
166
+ # Add events for sweep-line algorithm
167
+ events.append((start_date, 'start', secondary_job_type))
168
+ events.append((end_date, 'end', secondary_job_type))
169
+
170
+ observed_secondary_job_types.add(secondary_job_type)
171
+
172
+ # Sort events by date
173
+ events.sort(key=lambda x: x[0])
174
+
175
+ active_secondary_job_types = set()
176
+ last_date = None
177
+ durations = defaultdict(int) # in days
178
+
179
+ for event_date, event_type, secondary_job_type in events:
180
+ if last_date is not None and event_date > last_date:
181
+ interval_duration = (event_date - last_date).days
182
+ # Distribute the interval_duration among active_secondary_job_types
183
+ for active_type in active_secondary_job_types:
184
+ durations[active_type] += interval_duration
185
+
186
+ if event_type == 'start':
187
+ active_secondary_job_types.add(secondary_job_type)
188
+ elif event_type == 'end':
189
+ active_secondary_job_types.discard(secondary_job_type)
190
+
191
+ last_date = event_date
192
+
193
+ # Convert durations from days to years
194
+ durations_in_years = {stype: round(days / 365.25, 2) for stype, days in durations.items()}
195
+
196
+ return durations_in_years
197
+
198
+
199
  async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
200
  """
201
  Process a LinkedIn profile and classify its education and work experiences.
 
242
  work_classification.primary_job_type.INTERNSHIP,
243
  work_classification.primary_job_type.EXTRACURRICULAR,
244
  }:
245
+
246
  if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTMENT_BANKING:
247
  ib_classification = await self._investment_banking_classifier.classify_investment_banking_group(
248
  profile, position
249
  )
250
  classified_work_experience.investment_banking_classification = ib_classification
251
 
252
+ if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING and not work_classification.primary_job_type == work_classification.primary_job_type.ADVISORY_BOARD_INVESTOR:
253
+ asset_class_task = (
254
+ self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
255
+ profile, position
256
+ )
257
  )
258
  sector_task = self._investing_focus_sector_classifier.classify_investing_focus_sector(
259
  profile, position
 
266
 
267
  classified_work_experiences.append(classified_work_experience)
268
 
269
+
270
+ experience_by_job_type = self.estimate_full_time_experience_by_secondary_job_type(classified_work_experiences)
271
+ total_work_experience = sum(experience_by_job_type.values())
272
  return LinkedinProfileClassificationResults(
273
+ classified_educations=classified_educations, classified_work_experiences=classified_work_experiences, full_time_work_experience_years=total_work_experience, full_time_work_experience_by_secondary=experience_by_job_type
274
  )
275
 
276
 
src/vsp/app/model/linkedin/linkedin_models.py CHANGED
@@ -79,6 +79,7 @@ class Position(StartEndMixin):
79
  company_username: str | None = None
80
  company_url: str | None = None
81
  company_industry: str | None = None
 
82
  company_staff_count_range: str | None = None
83
  title: str | None = None
84
  location: str | None = None
@@ -90,7 +91,6 @@ class Position(StartEndMixin):
90
  class Skill(BaseSchema):
91
  name: str | None = None
92
 
93
-
94
  class Course(BaseSchema):
95
  name: str | None = None
96
  number: str | None = None
@@ -127,7 +127,7 @@ class LinkedinProfile(BaseSchema):
127
  languages: List[Language] | None = []
128
  educations: List[Education] = []
129
  positions: List[Position] = Field(default=[], alias="position")
130
- full_positions: List[Position] = Field(default=[], alias="position")
131
  skills: List[Skill] | None = []
132
  courses: List[Course] | None = []
133
  certifications: List[Certification] | None = []
@@ -141,8 +141,5 @@ class LinkedinProfile(BaseSchema):
141
  :return: A Profile instance created from the given JSON data.
142
  """
143
  profile = LinkedinProfile.model_validate(json)
144
- if (
145
- profile.full_positions is not None and profile.positions is not None
146
- ): # Fixing a RapidAPI thing where the positions may be incomplete, and we want to use the full_positions
147
- profile.positions = profile.full_positions
148
  return profile
 
79
  company_username: str | None = None
80
  company_url: str | None = None
81
  company_industry: str | None = None
82
+ company_description: str | None = None
83
  company_staff_count_range: str | None = None
84
  title: str | None = None
85
  location: str | None = None
 
91
  class Skill(BaseSchema):
92
  name: str | None = None
93
 
 
94
  class Course(BaseSchema):
95
  name: str | None = None
96
  number: str | None = None
 
127
  languages: List[Language] | None = []
128
  educations: List[Education] = []
129
  positions: List[Position] = Field(default=[], alias="position")
130
+ full_positions: List[Position] = Field(default=[])
131
  skills: List[Skill] | None = []
132
  courses: List[Course] | None = []
133
  certifications: List[Certification] | None = []
 
141
  :return: A Profile instance created from the given JSON data.
142
  """
143
  profile = LinkedinProfile.model_validate(json)
144
+ profile.positions = profile.full_positions
 
 
 
145
  return profile
src/vsp/app/prompts/education_classifier/1 - education_classifier_human.txt CHANGED
@@ -6,4 +6,4 @@ Full Resume:
6
  Specific Linkedin Education Item:
7
  {education}
8
 
9
- Provide your classification, confidence level (0.0 to 1.0), and reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the Linkedin education item that support your decision.
 
6
  Specific Linkedin Education Item:
7
  {education}
8
 
9
+ Ensure your reasoning refers to specific details from both the resume and the Linkedin education item that support your decision.
src/vsp/app/prompts/education_classifier/1 - education_classifier_system.txt CHANGED
@@ -15,7 +15,6 @@ Pay close attention to the degree type, field of study, and any other relevant i
15
  Provide your response in the following format:
16
 
17
  output: [CATEGORY_NAME]
18
- confidence: [0.0 to 1.0]
19
  reasoning: [Your explanation here]
20
 
21
  Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
 
15
  Provide your response in the following format:
16
 
17
  output: [CATEGORY_NAME]
 
18
  reasoning: [Your explanation here]
19
 
20
  Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
src/vsp/app/prompts/work_experience_classifier/1 - work_experience_classifier_human.txt CHANGED
@@ -6,4 +6,4 @@ Full Resume:
6
  Specific Work Experience Item:
7
  {work_experience}
8
 
9
- Provide your classification for both Primary Job Type and Secondary Job Type, along with your confidence level (0.0 to 1.0) and reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
 
6
  Specific Work Experience Item:
7
  {work_experience}
8
 
9
+ Provide your classification for both Primary Job Type and Secondary Job Type and reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
src/vsp/app/prompts/work_experience_classifier/1 - work_experience_classifier_system.txt CHANGED
@@ -4,7 +4,7 @@ Use the provided information carefully to make accurate classifications. Pay clo
4
 
5
  Primary Job Type categories:
6
  1. FULL_TIME: Regular, ongoing employment
7
- 2. ADVISORY_BOARD_INVESTOR: Advisory roles, board memberships, or independent investing activities
8
  3. INTERNSHIP: Short-term positions for students or recent graduates, including summer/seasonal analyst roles
9
  4. EXTRACURRICULAR: Unpaid activities related to career development, such as student clubs or competitions
10
  5. EDUCATION: When educational experiences are listed as work experiences
@@ -12,13 +12,15 @@ Primary Job Type categories:
12
 
13
  Some tips for primary job type categories:
14
  - Primary job categories are used to describe the hours and compensation setup of the work experience, as opposed to the industry of focus.
15
- - It cannot be ENTREPRENEUR_FOUNDER: Founding or co-founding a company, as this is a secondary job type category.
16
  - If the role is a 2-4 month job that happened in the summer, it's likely an INTERNSHIP. Something that's longer than 6 months is unlikely to be an internship unless it meets the other criteria below.
17
  - If the role is a part-time job while the candidate was in school, it's likely an INTERNSHIP.
18
  - If the role is before the first full-time job, it's probably during school and should be classified as INTERNSHIP.
19
  - If the role explicitly says "intern", "internship", or "externship", 'summer analyst', 'summer associate', 'winter analyst', or 'winter associate' it's almost certainly an INTERNSHIP.
20
  - If the role is a student club, a competition, or something related to the college of the job candidate, it's likely EXTRACURRICULAR.
21
  - If the role's company is a fraternity, a sorority, or a business fraternity, it's likely EXTRACURRICULAR.
 
 
 
22
 
23
  Secondary Job Type categories:
24
  1. INVESTING: Professional investing roles, such as venture capital, private equity, or hedge fund positions (Note: Roles in private equity should be classified as INVESTING)
@@ -28,7 +30,7 @@ Secondary Job Type categories:
28
  5. ENGINEERING: Software development, hardware engineering, or other technical roles
29
  6. ENTREPRENEUR_FOUNDER: Founding or co-founding a company
30
  7. CORPDEV_STRATEGY: Corporate development or strategic planning roles
31
- 8. OTHER: Any role that doesn't fit the above categories
32
 
33
  Some tips for secondary job type categories:
34
  - If the role involves analyzing potential investments, it's likely INVESTING.
@@ -38,7 +40,6 @@ Some tips for secondary job type categories:
38
  Provide your response in the following format exactly:
39
 
40
  reasoning: [Your explanation here]
41
- confidence: [0.0 to 1.0]
42
  primary_job_type: [one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER]
43
  secondary_job_type: [one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER]
44
 
@@ -47,4 +48,4 @@ Ensure each part of your response is on a separate line, exactly as shown above.
47
  The PRIMARY_JOB_TYPE must be one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER
48
  The SECONDARY_JOB_TYPE must be one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER
49
 
50
- Your confidence level should reflect how certain you are about your classification based on the information provided. In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
 
4
 
5
  Primary Job Type categories:
6
  1. FULL_TIME: Regular, ongoing employment
7
+ 2. ADVISORY_BOARD_INVESTOR: When advisory roles, board memberships, or investor status in a specific company are listed as work experience
8
  3. INTERNSHIP: Short-term positions for students or recent graduates, including summer/seasonal analyst roles
9
  4. EXTRACURRICULAR: Unpaid activities related to career development, such as student clubs or competitions
10
  5. EDUCATION: When educational experiences are listed as work experiences
 
12
 
13
  Some tips for primary job type categories:
14
  - Primary job categories are used to describe the hours and compensation setup of the work experience, as opposed to the industry of focus.
 
15
  - If the role is a 2-4 month job that happened in the summer, it's likely an INTERNSHIP. Something that's longer than 6 months is unlikely to be an internship unless it meets the other criteria below.
16
  - If the role is a part-time job while the candidate was in school, it's likely an INTERNSHIP.
17
  - If the role is before the first full-time job, it's probably during school and should be classified as INTERNSHIP.
18
  - If the role explicitly says "intern", "internship", or "externship", 'summer analyst', 'summer associate', 'winter analyst', or 'winter associate' it's almost certainly an INTERNSHIP.
19
  - If the role is a student club, a competition, or something related to the college of the job candidate, it's likely EXTRACURRICULAR.
20
  - If the role's company is a fraternity, a sorority, or a business fraternity, it's likely EXTRACURRICULAR.
21
+ - If the job title is "Investor", but it overlaps with other full-time employment, and the company description sounds like an operating company rather than an investment firm, it's likely ADVISORY_BOARD_INVESTOR.
22
+ - e.g., Someone says they were an "Investor" at OpenAI in 2023 but their resume indicates they had full-time employment at Sequoia Capital during this time, then they are saying that they invested in OpenAI while they were at Sequoia, not that they were an investor on behalf of OpenAI, and thus the classification for the "Investor at OpenAI" role should be ADVISORY_BOARD_INVESTOR.
23
+ - In general, if the company's description sounds more like an operating company rather than an investment firm/fund and the title is "Investor", there's a high chance the correct classification is ADVISORY_BOARD_INVESTOR.
24
 
25
  Secondary Job Type categories:
26
  1. INVESTING: Professional investing roles, such as venture capital, private equity, or hedge fund positions (Note: Roles in private equity should be classified as INVESTING)
 
30
  5. ENGINEERING: Software development, hardware engineering, or other technical roles
31
  6. ENTREPRENEUR_FOUNDER: Founding or co-founding a company
32
  7. CORPDEV_STRATEGY: Corporate development or strategic planning roles
33
+ 8. OTHER: Any role that doesn't fit the above categories (e.g., corporate finance)
34
 
35
  Some tips for secondary job type categories:
36
  - If the role involves analyzing potential investments, it's likely INVESTING.
 
40
  Provide your response in the following format exactly:
41
 
42
  reasoning: [Your explanation here]
 
43
  primary_job_type: [one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER]
44
  secondary_job_type: [one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER]
45
 
 
48
  The PRIMARY_JOB_TYPE must be one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER
49
  The SECONDARY_JOB_TYPE must be one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER
50
 
51
+ In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_human.txt CHANGED
@@ -6,7 +6,7 @@ Full Resume:
6
  Specific Work Experience Item:
7
  {work_experience}
8
 
9
- Provide your classification for the investing focus or asset class OF THIS SPECIFIC WORK EXPERIENCE, along with your confidence level (0.0 to 1.0) and reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
10
 
11
- NOTE: The candidate may have changed jobs, and therefore, investing focus. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific specific work experience on a standalone basis, LOWER your confidence level TO BELOW 0.5.
12
 
 
6
  Specific Work Experience Item:
7
  {work_experience}
8
 
9
+ Provide your classification for the investing focus or asset class OF THIS SPECIFIC WORK EXPERIENCE, along with your reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
10
 
11
+ NOTE: The candidate may have changed jobs, and therefore, investing focus. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific work experience on a standalone basis, set your classification to INSUFFICIENT_INFORMATION.
12
 
src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_system.txt CHANGED
@@ -15,19 +15,22 @@ Investing Focus / Asset Class categories:
15
  10. CREDIT: Credit investments
16
  11. SECONDARIES: Secondary market investments
17
  12. OTHER: Any focus or asset class that doesn't fit the above categories
18
- 13. UNCLASSIFIABLE: If the information provided is insufficient to make a classification
 
 
 
 
 
 
19
 
20
  Provide your response in the following format exactly:
21
 
22
  investing_focus_asset_class: [ONE_OF_THE_ABOVE_CATEGORIES]
23
  other_description: [Only if OTHER is selected, provide a brief description]
24
- confidence: [0.0 to 1.0]
25
  reasoning: [Your explanation here]
26
 
27
  Ensure each part of your response is on a separate line, exactly as shown above. There should be only three or four lines (depending on whether OTHER is selected).
28
 
29
- The investing_focus_asset_class must be one of: EARLY_STAGE_VC, LATE_STAGE_VC, MULTI_STAGE_VC, GROWTH_EQUITY, PRE_IPO, PUBLIC_EQUITIES, REAL_ESTATE, PRIVATE_EQUITY_BUYOUTS, HEDGE_FUND, CREDIT, SECONDARIES, OTHER.
30
-
31
- Your confidence level should reflect how certain you are about your classification based on the information provided.
32
 
33
  In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
 
15
  10. CREDIT: Credit investments
16
  11. SECONDARIES: Secondary market investments
17
  12. OTHER: Any focus or asset class that doesn't fit the above categories
18
+ 13. INSUFFICIENT_INFORMATION: If the information provided is insufficient to make a classification
19
+
20
+
21
+ Hints:
22
+ - If they worked for a firm that invests in multiple of these categories, use whichever context clues that you can to select which category applies best.
23
+ - For example if they work for a firm that does both venture capital and real estate investments and do not provide a description beyond "associate" for their current role, but their prior experience is all in real estate, you may assume this position is in real estate as well.
24
+ - If they work for a firm that does multistage VC, but they don't specify which stage they invested in, you can simply answer "MULTI_STAGE_VC" rather than "INSUFFICIENT_INFORMATION"
25
 
26
  Provide your response in the following format exactly:
27
 
28
  investing_focus_asset_class: [ONE_OF_THE_ABOVE_CATEGORIES]
29
  other_description: [Only if OTHER is selected, provide a brief description]
 
30
  reasoning: [Your explanation here]
31
 
32
  Ensure each part of your response is on a separate line, exactly as shown above. There should be only three or four lines (depending on whether OTHER is selected).
33
 
34
+ The investing_focus_asset_class must be one of: EARLY_STAGE_VC, LATE_STAGE_VC, MULTI_STAGE_VC, GROWTH_EQUITY, PRE_IPO, PUBLIC_EQUITIES, REAL_ESTATE, PRIVATE_EQUITY_BUYOUTS, HEDGE_FUND, CREDIT, SECONDARIES, OTHER, INSUFFICIENT_INFORMATION.
 
 
35
 
36
  In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_human.txt CHANGED
@@ -6,6 +6,6 @@ Full Resume:
6
  Specific Work Experience Item:
7
  {work_experience}
8
 
9
- Provide your classification for the investing focus sector OF THIS SPECIFIC WORK EXPERIENCE, along with your confidence level (0.0 to 1.0) and reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision.
10
 
11
- NOTE: The candidate may have changed jobs, and therefore, investing focus sectors. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific work experience on a standalone basis, LOWER your confidence level TO BELOW 0.5.
 
6
  Specific Work Experience Item:
7
  {work_experience}
8
 
9
+ Provide your classification for the investing focus sector OF THIS SPECIFIC WORK EXPERIENCE and reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision.
10
 
11
+ NOTE: The candidate may have changed jobs, and therefore, investing focus sectors. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific work experience on a standalone basis, ensure that your classification is INSUFFICIENT_INFORMATION.
src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_system.txt CHANGED
@@ -15,16 +15,20 @@ Investing Focus Sector categories:
15
  10. FINANCIAL_INSTITUTIONS: Focus on banks, insurance, fintech, and other financial services
16
  11. INFRASTRUCTURE_TRANSPORTATION: Focus on infrastructure projects and transportation
17
  12. OTHER: Any focus that doesn't fit the above categories
18
- 13. UNCLASSIFIABLE: If the information provided is insufficient to make a classification.
 
 
 
 
 
 
 
19
 
20
  Provide your response in the following format exactly:
21
 
22
  investing_focus_sector: [ONE_OF_THE_ABOVE_CATEGORIES]
23
- confidence: [0.0 to 1.0]
24
  reasoning: [Your explanation here]
25
 
26
  Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
27
 
28
- The investing_focus_sector must be one of: GENERALIST, HEALTHCARE, INDUSTRIALS, BUSINESS_SERVICES, CONSUMER_RETAIL, ENERGY_NATURAL_RESOURCES, REAL_ESTATE_GAMING_LODGING, TECHNOLOGY_SOFTWARE_TMT, MEDIA_ENTERTAINMENT, FINANCIAL_INSTITUTIONS, INFRASTRUCTURE_TRANSPORTATION, OTHER, UNCLASSIFIABLE.
29
-
30
- Your confidence level should reflect how certain you are about your classification based on the information provided. In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision.
 
15
  10. FINANCIAL_INSTITUTIONS: Focus on banks, insurance, fintech, and other financial services
16
  11. INFRASTRUCTURE_TRANSPORTATION: Focus on infrastructure projects and transportation
17
  12. OTHER: Any focus that doesn't fit the above categories
18
+ 13. INSUFFICIENT_INFORMATION: If the information provided is insufficient to make a classification.
19
+
20
+ Hints:
21
+ - The firm's focus will often give you a strong hint about what sector they covered.
22
+ - For example, if the firm focuses on CONSUMER_RETAIL and not other sectors, you can safely assume this person focused on CONSUMER_RETAIL.
23
+ - However, if the firm is generalist, that doesn't necessarily mean this work experience is generalist; they may have covered some specific area for the firm.
24
+ - Example: A VC firm is generalist, however this person was specifically on a fintech team, based on some info from their work experience description
25
+ - Or they *could* have been on a specific team (not enough info to know), in which case your answer would be INSUFFICIENT_INFORMATION
26
 
27
  Provide your response in the following format exactly:
28
 
29
  investing_focus_sector: [ONE_OF_THE_ABOVE_CATEGORIES]
 
30
  reasoning: [Your explanation here]
31
 
32
  Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
33
 
34
+ The investing_focus_sector must be one of: GENERALIST, HEALTHCARE, INDUSTRIALS, BUSINESS_SERVICES, CONSUMER_RETAIL, ENERGY_NATURAL_RESOURCES, REAL_ESTATE_GAMING_LODGING, TECHNOLOGY_SOFTWARE_TMT, MEDIA_ENTERTAINMENT, FINANCIAL_INSTITUTIONS, INFRASTRUCTURE_TRANSPORTATION, OTHER, INSUFFICIENT_INFORMATION.
 
 
src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_human.txt CHANGED
@@ -6,4 +6,4 @@ Full Resume:
6
  Specific Investment Banking Work Experience Item:
7
  {work_experience}
8
 
9
- Provide your best guess on the investment banking group, your confidence level (0.0 to 1.0), and your reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
 
6
  Specific Investment Banking Work Experience Item:
7
  {work_experience}
8
 
9
+ Provide your best guess on the investment banking group, and your reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_system.txt CHANGED
@@ -23,15 +23,13 @@ Investment banking groups:
23
  18. FINANCIAL_INSTITUTIONS: A group focused on financial institutions and banking transactions.
24
  19. INFRASTRUCTURE: A group specializing in infrastructure and transportation transactions.
25
  20. OTHER: Any group that doesn't fit the above categories.
26
- 21. UNCLASSIFIABLE: If the information provided is insufficient to make a classification
27
 
28
  Provide your response in the following format exactly:
29
 
30
  investment_banking_group: [One of the twenty investment banking groups listed above]
31
- confidence: [0.0 to 1.0]
32
  reasoning: [Your explanation here]
33
 
34
  Ensure each part of your response is on a separate line, exactly as shown above. There should be only three lines.
35
- Your confidence level should reflect how certain you are about your classification based on the information provided.
36
 
37
  In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
 
23
  18. FINANCIAL_INSTITUTIONS: A group focused on financial institutions and banking transactions.
24
  19. INFRASTRUCTURE: A group specializing in infrastructure and transportation transactions.
25
  20. OTHER: Any group that doesn't fit the above categories.
26
+ 21. INSUFFICIENT_INFORMATION: If the information provided is insufficient to make a classification
27
 
28
  Provide your response in the following format exactly:
29
 
30
  investment_banking_group: [One of the twenty investment banking groups listed above]
 
31
  reasoning: [Your explanation here]
32
 
33
  Ensure each part of your response is on a separate line, exactly as shown above. There should be only three lines.
 
34
 
35
  In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
src/vsp/app/scrapers/linkedin_downloader.py CHANGED
@@ -140,6 +140,43 @@ class LinkedinDownloader:
140
  headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
141
  return (headers, querystring)
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  @retry(
144
  wait=wait_random_exponential(min=1, max=60),
145
  stop=stop_after_attempt(3),
@@ -170,7 +207,30 @@ class LinkedinDownloader:
170
  async with session.get(self._URL, headers=headers, params=querystring) as response:
171
  if response.status == 200:
172
  data = await response.json()
173
- return LinkedinProfile.profile_from_json(data)
174
  else:
175
  logger.error("Failed to fetch Linkedin profile", url=linkedin_url, status=response.status)
176
  raise LinkedinFetchFailedError(f"Failed to fetch Linkedin profile for {linkedin_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
141
  return (headers, querystring)
142
 
143
+
144
+ def _compose_company_info_request(self, company_username: str) -> tuple[dict[str, str], dict[str, str]]:
145
+ querystring = {"username": company_username}
146
+ headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
147
+ return (headers, querystring)
148
+
149
+
150
+ async def augment_company_descriptions(self, session: aiohttp.ClientSession, profile: LinkedinProfile):
151
+ new_positions = []
152
+ for position in profile.positions:
153
+ company_username = position.company_username
154
+ if not company_username:
155
+ new_positions.append(position)
156
+ continue
157
+
158
+ headers, querystring = self._compose_company_info_request(company_username)
159
+ async with session.get(self._URL + "get-company-details", headers=headers, params=querystring) as response:
160
+ if response.status == 200:
161
+ data = await response.json()
162
+ description = data['data'].get('description')
163
+ tagline = data['data'].get('tagline')
164
+
165
+ result = []
166
+ if tagline:
167
+ result.append(tagline)
168
+ if description:
169
+ result.append(description)
170
+
171
+ company_description = '\n\n'.join(result)
172
+ # Create a new Position instance with the updated company_description
173
+ position = position.model_copy(update={"company_description": company_description})
174
+ else:
175
+ logger.error("Failed to fetch Linkedin company profile", company_username=company_username, status=response.status)
176
+ new_positions.append(position)
177
+ # Update the positions list with the new positions
178
+ profile.positions = new_positions
179
+
180
  @retry(
181
  wait=wait_random_exponential(min=1, max=60),
182
  stop=stop_after_attempt(3),
 
207
  async with session.get(self._URL, headers=headers, params=querystring) as response:
208
  if response.status == 200:
209
  data = await response.json()
210
+ profile = LinkedinProfile.profile_from_json(data)
211
  else:
212
  logger.error("Failed to fetch Linkedin profile", url=linkedin_url, status=response.status)
213
  raise LinkedinFetchFailedError(f"Failed to fetch Linkedin profile for {linkedin_url}")
214
+ await self.augment_company_descriptions(session, profile)
215
+ return profile
216
+
217
+
218
+
219
+ async def main():
220
+ # Initialize the LinkedinDownloader
221
+ downloader = LinkedinDownloader()
222
+
223
+ # LinkedIn profile URL to fetch
224
+ linkedin_url = "https://www.linkedin.com/in/lauren-hipple-84277373/"
225
+
226
+ try:
227
+ # Fetch the LinkedIn profile data
228
+ profile = await downloader.fetch_linkedin_data(linkedin_url)
229
+
230
+ except LinkedinFetchFailedError as e:
231
+ print(f"Error: {str(e)}")
232
+ except Exception as e:
233
+ print(f"An unexpected error occurred: {str(e)}")
234
+
235
+ if __name__ == "__main__":
236
+ asyncio.run(main())
tests/vsp/app/test_main.py DELETED
@@ -1,206 +0,0 @@
1
- from unittest.mock import AsyncMock, patch
2
-
3
- import pytest
4
-
5
- from vsp.app.classifiers.education_classifier import EducationClassification, SchoolType
6
- from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
7
- PrimaryJobType,
8
- SecondaryJobType,
9
- WorkExperienceClassification,
10
- )
11
- from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (
12
- InvestingFocusAssetClass,
13
- InvestingFocusAssetClassClassification,
14
- )
15
- from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (
16
- InvestingFocusSector,
17
- InvestingFocusSectorClassification,
18
- )
19
- from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
20
- InvestmentBankingGroup,
21
- InvestmentBankingGroupClassification,
22
- )
23
- from vsp.app.main import LinkedinProfileClassificationResults, process_linkedin_profile
24
- from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
25
-
26
-
27
- @pytest.fixture
28
- def sample_linkedin_profile():
29
- return LinkedinProfile(
30
- first_name="John",
31
- last_name="Doe",
32
- educations=[
33
- Education(
34
- school_name="Test University",
35
- degree="MBA",
36
- field_of_study="Business",
37
- )
38
- ],
39
- position=[
40
- Position(
41
- title="Investment Banking Analyst",
42
- company_name="Bank Corp",
43
- ),
44
- Position(
45
- title="Investment Associate",
46
- company_name="VC Firm",
47
- ),
48
- Position(
49
- title="Software Engineer",
50
- company_name="Tech Corp",
51
- ),
52
- ],
53
- )
54
-
55
-
56
- @pytest.mark.asyncio
57
- async def test_process_linkedin_profile_comprehensive(sample_linkedin_profile):
58
- with (
59
- patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
60
- patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
61
- patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
62
- patch("vsp.app.main.InvestingFocusAssetClassClassifier") as mock_investing_focus_asset_class_classifier,
63
- patch("vsp.app.main.InvestingFocusSectorClassifier") as mock_investing_focus_sector_classifier,
64
- ):
65
- mock_education_classifier.return_value.classify_education = AsyncMock(
66
- return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
67
- )
68
- mock_work_experience_classifier.return_value.classify_work_experience = AsyncMock(
69
- side_effect=[
70
- WorkExperienceClassification(
71
- primary_job_type=PrimaryJobType.FULL_TIME,
72
- secondary_job_type=SecondaryJobType.INVESTMENT_BANKING,
73
- confidence=1.0,
74
- reasoning="Test",
75
- ),
76
- WorkExperienceClassification(
77
- primary_job_type=PrimaryJobType.FULL_TIME,
78
- secondary_job_type=SecondaryJobType.INVESTING,
79
- confidence=1.0,
80
- reasoning="Test",
81
- ),
82
- WorkExperienceClassification(
83
- primary_job_type=PrimaryJobType.FULL_TIME,
84
- secondary_job_type=SecondaryJobType.ENGINEERING,
85
- confidence=1.0,
86
- reasoning="Test",
87
- ),
88
- ]
89
- )
90
- mock_investment_banking_group_classifier.return_value.classify_investment_banking_group = AsyncMock(
91
- return_value=InvestmentBankingGroupClassification(
92
- investment_banking_group=InvestmentBankingGroup.M_AND_A, confidence=1.0, reasoning="Test"
93
- )
94
- )
95
- mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class = AsyncMock(
96
- return_value=InvestingFocusAssetClassClassification(
97
- investing_focus_asset_class=InvestingFocusAssetClass.EARLY_STAGE_VC,
98
- confidence=1.0,
99
- reasoning="Test",
100
- )
101
- )
102
- mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector = AsyncMock(
103
- return_value=InvestingFocusSectorClassification(
104
- investing_focus_sector=InvestingFocusSector.TECHNOLOGY_SOFTWARE_TMT,
105
- confidence=1.0,
106
- reasoning="Test",
107
- )
108
- )
109
-
110
- result = await process_linkedin_profile(sample_linkedin_profile)
111
-
112
- assert isinstance(result, LinkedinProfileClassificationResults)
113
- assert len(result.classified_educations) == 1
114
- assert len(result.classified_work_experiences) == 3
115
-
116
- # Check investment banking position
117
- ib_experience = result.classified_work_experiences[0]
118
- assert ib_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING
119
- assert ib_experience.investment_banking_classification is not None
120
- assert (
121
- ib_experience.investment_banking_classification.investment_banking_group == InvestmentBankingGroup.M_AND_A
122
- )
123
- assert ib_experience.investing_focus_asset_class_classification is None
124
- assert ib_experience.investing_focus_sector_classification is None
125
-
126
- # Check investing position
127
- investing_experience = result.classified_work_experiences[1]
128
- assert investing_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTING
129
- assert investing_experience.investment_banking_classification is None
130
- assert investing_experience.investing_focus_asset_class_classification is not None
131
- assert (
132
- investing_experience.investing_focus_asset_class_classification.investing_focus_asset_class
133
- == InvestingFocusAssetClass.EARLY_STAGE_VC
134
- )
135
- assert investing_experience.investing_focus_sector_classification is not None
136
- assert (
137
- investing_experience.investing_focus_sector_classification.investing_focus_sector
138
- == InvestingFocusSector.TECHNOLOGY_SOFTWARE_TMT
139
- )
140
-
141
- # Check engineering position
142
- eng_experience = result.classified_work_experiences[2]
143
- assert eng_experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
144
- assert eng_experience.investment_banking_classification is None
145
- assert eng_experience.investing_focus_asset_class_classification is None
146
- assert eng_experience.investing_focus_sector_classification is None
147
-
148
- # Check that the classifiers were called the correct number of times
149
- assert mock_education_classifier.return_value.classify_education.call_count == 1
150
- assert mock_work_experience_classifier.return_value.classify_work_experience.call_count == 3
151
- assert mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.call_count == 1
152
- assert (
153
- mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class.call_count
154
- == 1
155
- )
156
- assert mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector.call_count == 1
157
-
158
-
159
- @pytest.mark.asyncio
160
- async def test_process_linkedin_profile_no_investing(sample_linkedin_profile):
161
- sample_linkedin_profile.positions = [
162
- Position(
163
- title="Software Engineer",
164
- company_name="Tech Corp",
165
- )
166
- ]
167
-
168
- with (
169
- patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
170
- patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
171
- patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
172
- patch("vsp.app.main.InvestingFocusAssetClassClassifier") as mock_investing_focus_asset_class_classifier,
173
- patch("vsp.app.main.InvestingFocusSectorClassifier") as mock_investing_focus_sector_classifier,
174
- ):
175
- mock_education_classifier.return_value.classify_education = AsyncMock(
176
- return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
177
- )
178
- mock_work_experience_classifier.return_value.classify_work_experience = AsyncMock(
179
- return_value=WorkExperienceClassification(
180
- primary_job_type=PrimaryJobType.FULL_TIME,
181
- secondary_job_type=SecondaryJobType.ENGINEERING,
182
- confidence=1.0,
183
- reasoning="Test",
184
- )
185
- )
186
-
187
- result = await process_linkedin_profile(sample_linkedin_profile)
188
-
189
- assert isinstance(result, LinkedinProfileClassificationResults)
190
- assert len(result.classified_educations) == 1
191
- assert len(result.classified_work_experiences) == 1
192
-
193
- work_experience = result.classified_work_experiences[0]
194
- assert work_experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
195
- assert work_experience.investment_banking_classification is None
196
- assert work_experience.investing_focus_asset_class_classification is None
197
- assert work_experience.investing_focus_sector_classification is None
198
-
199
- # ensure investment banking, investing focus asset class, and investing focus sector classifiers were not called
200
- mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.assert_not_called()
201
- mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class.assert_not_called()
202
- mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector.assert_not_called()
203
-
204
-
205
- if __name__ == "__main__":
206
- pytest.main([__file__])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uv.lock CHANGED
The diff for this file is too large to render. See raw diff