fixed a lot of stuff to meet spencer's requirements
Browse files- pyproject.toml +1 -0
- src/notebooks/classifiers/education_classifier.ipynb +28 -28
- src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb +44 -5
- src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb +62 -5
- src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb +58 -5
- src/notebooks/classifiers/work_experience/work_experience_classifier.ipynb +90 -5
- src/vsp/app/1st_gradio.py +111 -0
- src/vsp/app/bindings.py +4 -1
- src/vsp/app/classifiers/education_classifier.py +0 -3
- src/vsp/app/classifiers/work_experience/general_work_experience_classifier.py +2 -9
- src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py +5 -11
- src/vsp/app/classifiers/work_experience/investing_focus_sector_classifier.py +1 -11
- src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py +1 -10
- src/vsp/app/main.py +98 -5
- src/vsp/app/model/linkedin/linkedin_models.py +3 -6
- src/vsp/app/prompts/education_classifier/1 - education_classifier_human.txt +1 -1
- src/vsp/app/prompts/education_classifier/1 - education_classifier_system.txt +0 -1
- src/vsp/app/prompts/work_experience_classifier/1 - work_experience_classifier_human.txt +1 -1
- src/vsp/app/prompts/work_experience_classifier/1 - work_experience_classifier_system.txt +6 -5
- src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_human.txt +2 -2
- src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_system.txt +8 -5
- src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_human.txt +2 -2
- src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_system.txt +9 -5
- src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_human.txt +1 -1
- src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_system.txt +1 -3
- src/vsp/app/scrapers/linkedin_downloader.py +61 -1
- tests/vsp/app/test_main.py +0 -206
- uv.lock +0 -0
pyproject.toml
CHANGED
|
@@ -6,6 +6,7 @@ requires-python = ">=3.12"
|
|
| 6 |
dependencies = [
|
| 7 |
"aiohttp>=3.10.5",
|
| 8 |
"boto3>=1.35.12",
|
|
|
|
| 9 |
"ipykernel>=6.29.5",
|
| 10 |
"openai>=1.43.0",
|
| 11 |
"pydantic>=2.8.2",
|
|
|
|
| 6 |
dependencies = [
|
| 7 |
"aiohttp>=3.10.5",
|
| 8 |
"boto3>=1.35.12",
|
| 9 |
+
"gradio>=4.44.0",
|
| 10 |
"ipykernel>=6.29.5",
|
| 11 |
"openai>=1.43.0",
|
| 12 |
"pydantic>=2.8.2",
|
src/notebooks/classifiers/education_classifier.ipynb
CHANGED
|
@@ -2,14 +2,14 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [
|
| 8 |
{
|
| 9 |
"name": "stdout",
|
| 10 |
"output_type": "stream",
|
| 11 |
"text": [
|
| 12 |
-
"/
|
| 13 |
]
|
| 14 |
}
|
| 15 |
],
|
|
@@ -30,7 +30,7 @@
|
|
| 30 |
"print(os.getcwd())\n",
|
| 31 |
"\n",
|
| 32 |
"\n",
|
| 33 |
-
"with open(\"../tests/test_data/hansae_catlett.json\") as f:\n",
|
| 34 |
" data = json.load(f)\n",
|
| 35 |
" # convert to linkedin profile\n",
|
| 36 |
"\n",
|
|
@@ -39,50 +39,50 @@
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
-
"execution_count":
|
| 43 |
"metadata": {},
|
| 44 |
"outputs": [
|
| 45 |
{
|
| 46 |
"name": "stdout",
|
| 47 |
"output_type": "stream",
|
| 48 |
"text": [
|
| 49 |
-
"\u001b[2m2024-09-
|
| 50 |
-
"\u001b[2m2024-09-
|
| 51 |
-
"\u001b[2m2024-09-
|
| 52 |
-
"\u001b[2m2024-09-
|
| 53 |
-
"\u001b[2m2024-09-
|
| 54 |
-
"\u001b[2m2024-09-
|
| 55 |
-
"\u001b[2m2024-09-
|
| 56 |
-
"\u001b[2m2024-09-
|
| 57 |
-
"\u001b[2m2024-09-
|
| 58 |
-
"\u001b[2m2024-09-
|
| 59 |
-
"\u001b[2m2024-09-
|
| 60 |
-
"\u001b[2m2024-09-
|
| 61 |
-
"\u001b[2m2024-09-
|
| 62 |
-
"\u001b[2m2024-09-
|
| 63 |
-
"\u001b[2m2024-09-
|
| 64 |
-
"\u001b[2m2024-09-
|
| 65 |
-
"\u001b[2m2024-09-
|
| 66 |
-
"\u001b[2m2024-09-
|
| 67 |
"{\n",
|
| 68 |
" \"output\": \"Graduate School\",\n",
|
| 69 |
" \"confidence\": 1.0,\n",
|
| 70 |
-
" \"reasoning\": \"The specific Linkedin education item
|
| 71 |
"}\n",
|
| 72 |
"{\n",
|
| 73 |
" \"output\": \"Undergraduate (Incomplete)\",\n",
|
| 74 |
" \"confidence\": 0.9,\n",
|
| 75 |
-
" \"reasoning\": \"The specific Linkedin education item indicates
|
| 76 |
"}\n",
|
| 77 |
"{\n",
|
| 78 |
-
" \"output\": \"
|
| 79 |
" \"confidence\": 1.0,\n",
|
| 80 |
-
" \"reasoning\": \"The specific Linkedin education item
|
| 81 |
"}\n",
|
| 82 |
"{\n",
|
| 83 |
-
" \"output\": \"
|
| 84 |
" \"confidence\": 1.0,\n",
|
| 85 |
-
" \"reasoning\": \"The specific Linkedin education item clearly states that the candidate
|
| 86 |
"}\n"
|
| 87 |
]
|
| 88 |
}
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 3,
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [
|
| 8 |
{
|
| 9 |
"name": "stdout",
|
| 10 |
"output_type": "stream",
|
| 11 |
"text": [
|
| 12 |
+
"/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
|
| 13 |
]
|
| 14 |
}
|
| 15 |
],
|
|
|
|
| 30 |
"print(os.getcwd())\n",
|
| 31 |
"\n",
|
| 32 |
"\n",
|
| 33 |
+
"with open(\"../tests/test_data/sample_profiles/hansae_catlett.json\") as f:\n",
|
| 34 |
" data = json.load(f)\n",
|
| 35 |
" # convert to linkedin profile\n",
|
| 36 |
"\n",
|
|
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
+
"execution_count": 4,
|
| 43 |
"metadata": {},
|
| 44 |
"outputs": [
|
| 45 |
{
|
| 46 |
"name": "stdout",
|
| 47 |
"output_type": "stream",
|
| 48 |
"text": [
|
| 49 |
+
"\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 50 |
+
"\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 51 |
+
"\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 52 |
+
"\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 53 |
+
"\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 54 |
+
"\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
|
| 55 |
+
"\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 56 |
+
"\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 57 |
+
"\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 58 |
+
"\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
|
| 59 |
+
"\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 60 |
+
"\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1100\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
|
| 61 |
+
"\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 62 |
+
"\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1057\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m103\u001b[0m\n",
|
| 63 |
+
"\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 64 |
+
"\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1108\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
|
| 65 |
+
"\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 66 |
+
"\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1146\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
|
| 67 |
"{\n",
|
| 68 |
" \"output\": \"Graduate School\",\n",
|
| 69 |
" \"confidence\": 1.0,\n",
|
| 70 |
+
" \"reasoning\": \"The specific Linkedin education item indicates that the candidate earned a Master of Public Policy (M.P.P.) from Stanford University, which aligns with the completed graduate degree listed in the full resume. This classification fits the GRAD_SCHOOL category as it is a Master's degree in a field other than Business Administration or Law. The details provided confirm the completion of this degree during the specified period.\"\n",
|
| 71 |
"}\n",
|
| 72 |
"{\n",
|
| 73 |
" \"output\": \"Undergraduate (Incomplete)\",\n",
|
| 74 |
" \"confidence\": 0.9,\n",
|
| 75 |
+
" \"reasoning\": \"The specific Linkedin education item indicates a \\\"Study Abroad\\\" program at the University of New South Wales, which aligns with the incomplete undergraduate studies mentioned in the resume. The resume lists a completed undergraduate degree from Harvard University, but the study abroad experience does not constitute a completed degree itself, thus fitting the category of UNDERGRAD_INCOMPLETE. The confidence is high due to the clear distinction between completed and incomplete educational experiences.\"\n",
|
| 76 |
"}\n",
|
| 77 |
"{\n",
|
| 78 |
+
" \"output\": \"Undergraduate (Completed)\",\n",
|
| 79 |
" \"confidence\": 1.0,\n",
|
| 80 |
+
" \"reasoning\": \"The specific Linkedin education item details a Bachelor of Arts degree in Biomedical Engineering and Philosophy from Harvard University, which aligns perfectly with the information provided in the full resume. The resume confirms the completion of this undergraduate degree, as it states the same degree and institution, along with the graduation period from 2007 to 2011. Therefore, it is classified as a completed undergraduate degree.\"\n",
|
| 81 |
"}\n",
|
| 82 |
"{\n",
|
| 83 |
+
" \"output\": \"MBA\",\n",
|
| 84 |
" \"confidence\": 1.0,\n",
|
| 85 |
+
" \"reasoning\": \"The specific Linkedin education item clearly states that the candidate obtained a Master of Business Administration (M.B.A.) from Stanford University Graduate School of Business between 2016 and 2019. This aligns perfectly with the information in the full resume, which also lists the same degree and institution. The high confidence level is due to the direct match in degree type and institution, confirming the classification as an MBA.\"\n",
|
| 86 |
"}\n"
|
| 87 |
]
|
| 88 |
}
|
src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb
CHANGED
|
@@ -2,9 +2,17 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"source": [
|
| 9 |
"import json\n",
|
| 10 |
"import os\n",
|
|
@@ -22,7 +30,7 @@
|
|
| 22 |
"print(os.getcwd())\n",
|
| 23 |
"\n",
|
| 24 |
"\n",
|
| 25 |
-
"with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
|
| 26 |
" data = json.load(f)\n",
|
| 27 |
" # convert to linkedin profile\n",
|
| 28 |
"\n",
|
|
@@ -31,9 +39,40 @@
|
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"cell_type": "code",
|
| 34 |
-
"execution_count":
|
| 35 |
"metadata": {},
|
| 36 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"source": [
|
| 38 |
"import asyncio\n",
|
| 39 |
"\n",
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
|
| 13 |
+
]
|
| 14 |
+
}
|
| 15 |
+
],
|
| 16 |
"source": [
|
| 17 |
"import json\n",
|
| 18 |
"import os\n",
|
|
|
|
| 30 |
"print(os.getcwd())\n",
|
| 31 |
"\n",
|
| 32 |
"\n",
|
| 33 |
+
"with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
|
| 34 |
" data = json.load(f)\n",
|
| 35 |
" # convert to linkedin profile\n",
|
| 36 |
"\n",
|
|
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
+
"execution_count": 3,
|
| 43 |
"metadata": {},
|
| 44 |
+
"outputs": [
|
| 45 |
+
{
|
| 46 |
+
"name": "stdout",
|
| 47 |
+
"output_type": "stream",
|
| 48 |
+
"text": [
|
| 49 |
+
"\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 50 |
+
"\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 51 |
+
"\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 52 |
+
"\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 53 |
+
"{\n",
|
| 54 |
+
" \"investment_banking_group\": \"Private Funds\",\n",
|
| 55 |
+
" \"confidence\": 0.9,\n",
|
| 56 |
+
" \"reasoning\": \"The candidate's role as an Investment Professional at Accel-KKR, a private equity firm focused on technology investments, aligns closely with the PRIVATE_FUNDS group. Their work involves acquisitions, buyouts, and structured investments, which are typical activities in private equity. Additionally, the candidate's experience at Fidelity Investments in corporate finance suggests a strong background in financial transactions, further supporting this classification.\"\n",
|
| 57 |
+
"}\n",
|
| 58 |
+
"{\n",
|
| 59 |
+
" \"investment_banking_group\": \"Generalist\",\n",
|
| 60 |
+
" \"confidence\": 0.8,\n",
|
| 61 |
+
" \"reasoning\": \"The candidate worked in the Investment Banking division at William Blair & Company, a firm known for providing a wide range of financial advisory services, which suggests a generalist role. The absence of specific details in the work experience description indicates that the candidate may have been involved in various transactions across different sectors rather than specializing in a particular area. Additionally, their subsequent role at Accel-KKR, a private equity firm focused on technology, further supports the idea of a generalist background prior to specializing.\"\n",
|
| 62 |
+
"}\n",
|
| 63 |
+
"{\n",
|
| 64 |
+
" \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
|
| 65 |
+
" \"confidence\": 0.8,\n",
|
| 66 |
+
" \"reasoning\": \"The candidate worked in the FFAS Corporate Finance division at Fidelity Investments, a major player in the financial services sector. This role likely involved financial advisory services related to financial institutions, aligning with the focus of the FINANCIAL_INSTITUTIONS group. The candidate's experience at Fidelity, combined with their education in Business Economics, supports this classification, although the lack of specific details in the job description leaves some uncertainty.\"\n",
|
| 67 |
+
"}\n",
|
| 68 |
+
"{\n",
|
| 69 |
+
" \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
|
| 70 |
+
" \"confidence\": 0.8,\n",
|
| 71 |
+
" \"reasoning\": \"The candidate worked in the PI Corporate Finance division at Fidelity Investments, a major player in the financial services sector. This role likely involved financial advisory services related to financial institutions, aligning with the FINANCIAL_INSTITUTIONS group. The candidate's experience at Fidelity, combined with their background in investment banking and private equity, supports this classification, although the lack of specific details in the work experience description leads to a slightly lower confidence level.\"\n",
|
| 72 |
+
"}\n"
|
| 73 |
+
]
|
| 74 |
+
}
|
| 75 |
+
],
|
| 76 |
"source": [
|
| 77 |
"import asyncio\n",
|
| 78 |
"\n",
|
src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb
CHANGED
|
@@ -2,9 +2,17 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"source": [
|
| 9 |
"import json\n",
|
| 10 |
"import os\n",
|
|
@@ -22,7 +30,7 @@
|
|
| 22 |
"print(os.getcwd())\n",
|
| 23 |
"\n",
|
| 24 |
"\n",
|
| 25 |
-
"with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
|
| 26 |
" data = json.load(f)\n",
|
| 27 |
" # convert to linkedin profile\n",
|
| 28 |
"\n",
|
|
@@ -31,9 +39,58 @@
|
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"cell_type": "code",
|
| 34 |
-
"execution_count":
|
| 35 |
"metadata": {},
|
| 36 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"source": [
|
| 38 |
"import asyncio\n",
|
| 39 |
"\n",
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 3,
|
| 6 |
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
|
| 13 |
+
]
|
| 14 |
+
}
|
| 15 |
+
],
|
| 16 |
"source": [
|
| 17 |
"import json\n",
|
| 18 |
"import os\n",
|
|
|
|
| 30 |
"print(os.getcwd())\n",
|
| 31 |
"\n",
|
| 32 |
"\n",
|
| 33 |
+
"with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
|
| 34 |
" data = json.load(f)\n",
|
| 35 |
" # convert to linkedin profile\n",
|
| 36 |
"\n",
|
|
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
+
"execution_count": 4,
|
| 43 |
"metadata": {},
|
| 44 |
+
"outputs": [
|
| 45 |
+
{
|
| 46 |
+
"name": "stdout",
|
| 47 |
+
"output_type": "stream",
|
| 48 |
+
"text": [
|
| 49 |
+
"\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 50 |
+
"\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 51 |
+
"\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 52 |
+
"\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 53 |
+
"\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 54 |
+
"\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
|
| 55 |
+
"\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 56 |
+
"\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 57 |
+
"\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 58 |
+
"\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
|
| 59 |
+
"\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 60 |
+
"\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
|
| 61 |
+
"\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 62 |
+
"\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1084\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
|
| 63 |
+
"\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 64 |
+
"\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m99\u001b[0m\n",
|
| 65 |
+
"\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 66 |
+
"\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1158\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m114\u001b[0m\n",
|
| 67 |
+
"{\n",
|
| 68 |
+
" \"investing_focus_asset_class\": \"Unclassifiable\",\n",
|
| 69 |
+
" \"confidence\": 0.3,\n",
|
| 70 |
+
" \"reasoning\": \"The specific work experience at Fidelity Investments as \\\"FFAS Corporate Finance\\\" does not provide enough detail about the nature of the investments or financial activities undertaken during that time. Without a description of the specific focus or asset class related to this role, it is difficult to classify it accurately. The lack of information leads to a low confidence level in making a definitive classification.\",\n",
|
| 71 |
+
" \"other_description\": null\n",
|
| 72 |
+
"}\n",
|
| 73 |
+
"{\n",
|
| 74 |
+
" \"investing_focus_asset_class\": \"Unclassifiable\",\n",
|
| 75 |
+
" \"confidence\": 0.3,\n",
|
| 76 |
+
" \"reasoning\": \"The specific work experience item at Fidelity Investments as \\\"PI Corporate Finance\\\" does not provide any details about the nature of the investments or financial activities undertaken in that role. Without specific information regarding the focus on asset classes or investment strategies, it is difficult to classify this experience accurately. The lack of a description or responsibilities limits the ability to determine a clear investing focus or asset class.\",\n",
|
| 77 |
+
" \"other_description\": null\n",
|
| 78 |
+
"}\n",
|
| 79 |
+
"{\n",
|
| 80 |
+
" \"investing_focus_asset_class\": \"Unclassifiable\",\n",
|
| 81 |
+
" \"confidence\": 0.3,\n",
|
| 82 |
+
" \"reasoning\": \"The specific work experience at William Blair & Company is labeled as \\\"Investment Banking,\\\" but there is no detailed description of the responsibilities or types of transactions involved. Without specific information on whether the focus was on equity, debt, mergers, or acquisitions, it is difficult to classify this experience into one of the predefined investing focus or asset class categories. The lack of detail leads to uncertainty in classification.\",\n",
|
| 83 |
+
" \"other_description\": null\n",
|
| 84 |
+
"}\n",
|
| 85 |
+
"{\n",
|
| 86 |
+
" \"investing_focus_asset_class\": \"Private Equity / Buyouts\",\n",
|
| 87 |
+
" \"confidence\": 0.9,\n",
|
| 88 |
+
" \"reasoning\": \"The specific work experience at Accel-KKR indicates a focus on private equity investments, particularly in technology and software sectors. The description highlights typical transactions such as acquisitions, buyouts of divisions from public companies, and take-private transactions, which are all characteristic of private equity buyouts. This aligns well with the classification of PRIVATE_EQUITY_BUYOUTS. The high confidence level reflects the clear alignment of the job responsibilities with this asset class.\",\n",
|
| 89 |
+
" \"other_description\": null\n",
|
| 90 |
+
"}\n"
|
| 91 |
+
]
|
| 92 |
+
}
|
| 93 |
+
],
|
| 94 |
"source": [
|
| 95 |
"import asyncio\n",
|
| 96 |
"\n",
|
src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb
CHANGED
|
@@ -2,9 +2,17 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"source": [
|
| 9 |
"import json\n",
|
| 10 |
"import os\n",
|
|
@@ -22,7 +30,7 @@
|
|
| 22 |
"print(os.getcwd())\n",
|
| 23 |
"\n",
|
| 24 |
"\n",
|
| 25 |
-
"with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
|
| 26 |
" data = json.load(f)\n",
|
| 27 |
" # convert to linkedin profile\n",
|
| 28 |
"\n",
|
|
@@ -31,9 +39,54 @@
|
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"cell_type": "code",
|
| 34 |
-
"execution_count":
|
| 35 |
"metadata": {},
|
| 36 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"source": [
|
| 38 |
"import asyncio\n",
|
| 39 |
"\n",
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
|
| 13 |
+
]
|
| 14 |
+
}
|
| 15 |
+
],
|
| 16 |
"source": [
|
| 17 |
"import json\n",
|
| 18 |
"import os\n",
|
|
|
|
| 30 |
"print(os.getcwd())\n",
|
| 31 |
"\n",
|
| 32 |
"\n",
|
| 33 |
+
"with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
|
| 34 |
" data = json.load(f)\n",
|
| 35 |
" # convert to linkedin profile\n",
|
| 36 |
"\n",
|
|
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
+
"execution_count": 2,
|
| 43 |
"metadata": {},
|
| 44 |
+
"outputs": [
|
| 45 |
+
{
|
| 46 |
+
"name": "stdout",
|
| 47 |
+
"output_type": "stream",
|
| 48 |
+
"text": [
|
| 49 |
+
"\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 50 |
+
"\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 51 |
+
"\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 52 |
+
"\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 53 |
+
"\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 54 |
+
"\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
|
| 55 |
+
"\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 56 |
+
"\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 57 |
+
"\u001b[2m2024-09-16 15:13:38\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 58 |
+
"\u001b[2m2024-09-16 15:13:38\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
|
| 59 |
+
"\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 60 |
+
"\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m88\u001b[0m\n",
|
| 61 |
+
"\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 62 |
+
"\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1080\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
|
| 63 |
+
"\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 64 |
+
"\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
|
| 65 |
+
"\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 66 |
+
"\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1154\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
|
| 67 |
+
"{\n",
|
| 68 |
+
" \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
|
| 69 |
+
" \"confidence\": 0.7,\n",
|
| 70 |
+
" \"reasoning\": \"The specific work experience item indicates that the candidate worked at Fidelity Investments, a company clearly categorized under financial services. The title \\\"FFAS Corporate Finance\\\" suggests involvement in financial analysis and corporate finance activities, which aligns with the financial institutions sector. However, the lack of detailed responsibilities in the description leads to a slightly lower confidence level.\"\n",
|
| 71 |
+
"}\n",
|
| 72 |
+
"{\n",
|
| 73 |
+
" \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
|
| 74 |
+
" \"confidence\": 0.8,\n",
|
| 75 |
+
" \"reasoning\": \"The specific work experience item indicates that the candidate worked at Fidelity Investments, a company clearly categorized under financial services. The title \\\"PI Corporate Finance\\\" suggests involvement in financial activities, likely related to investment management or corporate finance within the financial sector. While the description lacks detail, the context of the company and the role strongly supports classification in the financial institutions sector.\"\n",
|
| 76 |
+
"}\n",
|
| 77 |
+
"{\n",
|
| 78 |
+
" \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
|
| 79 |
+
" \"confidence\": 0.8,\n",
|
| 80 |
+
" \"reasoning\": \"The specific work experience item indicates that the candidate worked in Investment Banking at William Blair & Company, which is categorized under Financial Services. This aligns with the focus on financial institutions, as investment banking is a core component of this sector. The absence of a detailed description does not detract significantly from the classification, as the job title and company industry provide clear context.\"\n",
|
| 81 |
+
"}\n",
|
| 82 |
+
"{\n",
|
| 83 |
+
" \"investing_focus_sector\": \"Technology / Software / TMT\",\n",
|
| 84 |
+
" \"confidence\": 0.9,\n",
|
| 85 |
+
" \"reasoning\": \"The specific work experience at Accel-KKR clearly indicates a focus on investing in software and technology-enabled services companies. The description highlights that the firm is dedicated exclusively to this sector, which aligns directly with the TECHNOLOGY_SOFTWARE_TMT category. The confidence level is high due to the explicit mention of the firm's focus area in both the resume and the work experience item.\"\n",
|
| 86 |
+
"}\n"
|
| 87 |
+
]
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
"source": [
|
| 91 |
"import asyncio\n",
|
| 92 |
"\n",
|
src/notebooks/classifiers/work_experience/work_experience_classifier.ipynb
CHANGED
|
@@ -2,9 +2,17 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"source": [
|
| 9 |
"import json\n",
|
| 10 |
"import os\n",
|
|
@@ -22,7 +30,7 @@
|
|
| 22 |
"print(os.getcwd())\n",
|
| 23 |
"\n",
|
| 24 |
"\n",
|
| 25 |
-
"with open(\"../tests/test_data/hansae_catlett.json\") as f:\n",
|
| 26 |
" data = json.load(f)\n",
|
| 27 |
" # convert to linkedin profile\n",
|
| 28 |
"\n",
|
|
@@ -31,9 +39,86 @@
|
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"cell_type": "code",
|
| 34 |
-
"execution_count":
|
| 35 |
"metadata": {},
|
| 36 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"source": [
|
| 38 |
"import asyncio\n",
|
| 39 |
"\n",
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 3,
|
| 6 |
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
|
| 13 |
+
]
|
| 14 |
+
}
|
| 15 |
+
],
|
| 16 |
"source": [
|
| 17 |
"import json\n",
|
| 18 |
"import os\n",
|
|
|
|
| 30 |
"print(os.getcwd())\n",
|
| 31 |
"\n",
|
| 32 |
"\n",
|
| 33 |
+
"with open(\"../tests/test_data/sample_profiles/hansae_catlett.json\") as f:\n",
|
| 34 |
" data = json.load(f)\n",
|
| 35 |
" # convert to linkedin profile\n",
|
| 36 |
"\n",
|
|
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
+
"execution_count": 4,
|
| 43 |
"metadata": {},
|
| 44 |
+
"outputs": [
|
| 45 |
+
{
|
| 46 |
+
"name": "stdout",
|
| 47 |
+
"output_type": "stream",
|
| 48 |
+
"text": [
|
| 49 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 50 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 51 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 52 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 53 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 54 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 55 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 56 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 57 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 58 |
+
"\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
|
| 59 |
+
"{\n",
|
| 60 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 61 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 62 |
+
" \"confidence\": 0.9,\n",
|
| 63 |
+
" \"reasoning\": \"The work experience as a Partner at HOF Capital is a full-time role in a venture capital firm, which aligns with the candidate's ongoing career in investing. Given that the candidate has been involved in venture capital and private equity roles, this position is classified as FULL_TIME. The secondary job type is classified as INVESTING, as the role directly involves investment activities typical of a venture capital firm. This classification is supported by the candidate's previous experience as Vice President at Bessemer Venture Partners and their role as Co-Founder at The MBA Fund, both of which are also in the investing domain.\"\n",
|
| 64 |
+
"}\n",
|
| 65 |
+
"{\n",
|
| 66 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 67 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 68 |
+
" \"confidence\": 0.9,\n",
|
| 69 |
+
" \"reasoning\": \"The work experience at The MBA Fund is classified as FULL_TIME because it is a co-founding role in a venture capital firm, indicating ongoing and regular employment. The responsibilities described involve significant engagement with startups and investment activities, which aligns with a full-time commitment. For the secondary job type, it is classified as INVESTING since the role involves backing and supporting startup founders, which is characteristic of venture capital activities. This is supported by the description of the firm and the candidate's role as a General Partner, which typically involves making investment decisions and managing a portfolio of investments.\"\n",
|
| 70 |
+
"}\n",
|
| 71 |
+
"{\n",
|
| 72 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 73 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 74 |
+
" \"confidence\": 1.0,\n",
|
| 75 |
+
" \"reasoning\": \"The work experience as Vice President at Bessemer Venture Partners is classified as FULL_TIME because it is a full-time position that spans over four years, which aligns with the criteria for regular ongoing employment. The secondary job type is classified as INVESTING, as the role is within a venture capital firm, which involves professional investing activities. This is supported by the candidate's extensive background in venture capital and private equity, as indicated in both the resume and the specific work experience item.\"\n",
|
| 76 |
+
"}\n",
|
| 77 |
+
"{\n",
|
| 78 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 79 |
+
" \"secondary_job_type\": \"Other\",\n",
|
| 80 |
+
" \"confidence\": 0.9,\n",
|
| 81 |
+
" \"reasoning\": \"The work experience as a Board Observer at Rillavoice is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities. Given that this position is not a full-time employment role and does not fit the criteria for an internship, it is classified as OTHER for the primary job type. For the secondary job type, since the role involves oversight and strategic input in a software company, it does not fit into the categories of investing, banking, or consulting, thus it is classified as OTHER as well.\"\n",
|
| 82 |
+
"}\n",
|
| 83 |
+
"{\n",
|
| 84 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 85 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 86 |
+
" \"confidence\": 0.9,\n",
|
| 87 |
+
" \"reasoning\": \"The work experience at Archy is classified as an INVESTING role because the title \\\"Investor\\\" indicates a professional investing position, which aligns with the candidate's experience in venture capital as seen in their role as Vice President at Bessemer Venture Partners and Co-Founder at The MBA Fund. The duration from 2022 to 2024 suggests a longer-term commitment, which is typical for investing roles rather than internships. The primary job type is classified as OTHER since there is no indication of full-time employment or internship status, and it does not fit into the other primary categories.\"\n",
|
| 88 |
+
"}\n",
|
| 89 |
+
"{\n",
|
| 90 |
+
" \"primary_job_type\": \"Advisory / Board / Independent Investor\",\n",
|
| 91 |
+
" \"secondary_job_type\": \"Other\",\n",
|
| 92 |
+
" \"confidence\": 0.9,\n",
|
| 93 |
+
" \"reasoning\": \"The work experience as a Board Observer at MaintainX is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities. Given that this position is not a full-time role and does not indicate a regular employment setup, it does not fit the FULL_TIME category. The duration from 2021 to 2024 suggests a longer-term commitment, but the nature of the role indicates it is more advisory in nature. Therefore, I classify it as ADVISORY_BOARD_INVESTOR for the primary job type. For the secondary job type, since the role involves oversight and strategic input rather than direct investment activities, it does not fit into the INVESTING category. Instead, it aligns more closely with the OTHER category, as it does not fit the other defined roles.\"\n",
|
| 94 |
+
"}\n",
|
| 95 |
+
"{\n",
|
| 96 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 97 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 98 |
+
" \"confidence\": 0.9,\n",
|
| 99 |
+
" \"reasoning\": \"The work experience at Spot AI is classified as an INVESTING role because the title \\\"Investor\\\" indicates a professional investing position, which aligns with the candidate's experience in venture capital and investment roles as seen in their resume. The duration from 2021 to 2024 suggests a long-term commitment, which rules out the possibility of it being an internship. The primary job type is classified as FULL_TIME since the role is ongoing and likely compensated, fitting the criteria for regular employment.\"\n",
|
| 100 |
+
"}\n",
|
| 101 |
+
"{\n",
|
| 102 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 103 |
+
" \"secondary_job_type\": \"Entrepreneur / Founder\",\n",
|
| 104 |
+
" \"confidence\": 0.9,\n",
|
| 105 |
+
" \"reasoning\": \"The work experience as a Board Observer at VendorPM is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities rather than a full-time employment setup. Given that the candidate has been involved in this role from 2021 to 2024, it suggests a part-time or advisory capacity rather than a full-time position. Therefore, the Primary Job Type is classified as OTHER, as it does not fit the other categories. For the Secondary Job Type, this role is best classified as ENTREPRENEUR_FOUNDER since it involves oversight and advisory functions in a startup environment, which is common for board observer roles in venture-backed companies.\"\n",
|
| 106 |
+
"}\n",
|
| 107 |
+
"{\n",
|
| 108 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 109 |
+
" \"secondary_job_type\": \"CorpDev / Strategy\",\n",
|
| 110 |
+
" \"confidence\": 0.9,\n",
|
| 111 |
+
" \"reasoning\": \"The work experience as a Board Director at Luxury Presence is a long-term role from 2021 to 2024, indicating a significant commitment that aligns with a full-time position rather than a temporary or part-time role. Given that this position involves governance and oversight, it does not fit the criteria for an internship or extracurricular activity. In terms of secondary job type, the role of Board Director typically involves strategic decision-making and oversight, which aligns with CORPDEV_STRATEGY as it is a corporate governance role rather than a direct investment role.\"\n",
|
| 112 |
+
"}\n",
|
| 113 |
+
"{\n",
|
| 114 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 115 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 116 |
+
" \"confidence\": 0.9,\n",
|
| 117 |
+
" \"reasoning\": \"The work experience at ServiceTitan is classified as an INVESTING role because the title \\\"Investor\\\" indicates a focus on investment activities, which aligns with the candidate's background in venture capital and investment roles as seen in their resume. The candidate has been involved in various investment capacities, including their role as Vice President at Bessemer Venture Partners and as a Co-Founder at The MBA Fund, both of which emphasize their expertise in investing. The duration of the role from 2021 to 2024 suggests a longer-term commitment, which is typical for professional investing roles rather than internships or part-time positions. For the Primary Job Type, since this role is not a full-time position but rather an investment role, it is classified as OTHER, as it does not fit the other primary categories like FULL_TIME or INTERNSHIP.\"\n",
|
| 118 |
+
"}\n"
|
| 119 |
+
]
|
| 120 |
+
}
|
| 121 |
+
],
|
| 122 |
"source": [
|
| 123 |
"import asyncio\n",
|
| 124 |
"\n",
|
src/vsp/app/1st_gradio.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import nest_asyncio
|
| 3 |
+
|
| 4 |
+
# Apply nest_asyncio to allow nested event loops
|
| 5 |
+
nest_asyncio.apply()
|
| 6 |
+
|
| 7 |
+
# Import your custom modules
|
| 8 |
+
from vsp.app.scrapers.linkedin_downloader import LinkedinDownloader
|
| 9 |
+
from vsp.app.main import VspDataEnrichment
|
| 10 |
+
|
| 11 |
+
async def process_profile(profile_linkedin):
|
| 12 |
+
downloader = LinkedinDownloader()
|
| 13 |
+
enricher = VspDataEnrichment()
|
| 14 |
+
profile = await downloader.fetch_linkedin_data(linkedin_url=profile_linkedin)
|
| 15 |
+
enriched_profile = await enricher.process_linkedin_profile(profile=profile)
|
| 16 |
+
return enriched_profile
|
| 17 |
+
|
| 18 |
+
async def analyze_profile(profile_linkedin):
|
| 19 |
+
enriched_profile = await process_profile(profile_linkedin)
|
| 20 |
+
# Generate output from enriched_profile
|
| 21 |
+
education_outputs = []
|
| 22 |
+
work_experience_outputs = []
|
| 23 |
+
|
| 24 |
+
# Process classified educations
|
| 25 |
+
for idx, edu in enumerate(enriched_profile.classified_educations, 1):
|
| 26 |
+
school = edu.education.school_name
|
| 27 |
+
degree = edu.education.degree
|
| 28 |
+
year = edu.education.end.year if edu.education.end else "N/A"
|
| 29 |
+
classification = edu.classification.output.value
|
| 30 |
+
education_outputs.append(f"### Education {idx}\n"
|
| 31 |
+
f"**School:** {school}\n\n"
|
| 32 |
+
f"**Degree:** {degree}\n\n"
|
| 33 |
+
f"**Year:** {year}\n\n"
|
| 34 |
+
f"**Classification:** {classification}\n")
|
| 35 |
+
|
| 36 |
+
# Add total years of full-time work experience
|
| 37 |
+
total_experience_years = enriched_profile.full_time_work_experience_years
|
| 38 |
+
experience_by_secondary = enriched_profile.full_time_work_experience_by_secondary
|
| 39 |
+
|
| 40 |
+
experience_output = f"### Total Full-Time Work Experience: {total_experience_years} years\n\n"
|
| 41 |
+
|
| 42 |
+
if experience_by_secondary:
|
| 43 |
+
experience_output += "### Work Experience by Secondary Job Type:\n"
|
| 44 |
+
for secondary_job_type, years in experience_by_secondary.items():
|
| 45 |
+
experience_output += f"- {secondary_job_type.value}: {years} years\n"
|
| 46 |
+
|
| 47 |
+
# Process classified work experiences
|
| 48 |
+
for idx, exp in enumerate(enriched_profile.classified_work_experiences, 1):
|
| 49 |
+
company = exp.position.company_name
|
| 50 |
+
start_year = exp.position.start.year if exp.position.start else "N/A"
|
| 51 |
+
end_year = exp.position.end.year if (exp.position.end and exp.position.end.year) else "Present"
|
| 52 |
+
time_range = f"{start_year} - {end_year}"
|
| 53 |
+
title = exp.position.title
|
| 54 |
+
primary_job_type = exp.work_experience_classification.primary_job_type.value
|
| 55 |
+
secondary_job_type = exp.work_experience_classification.secondary_job_type.value
|
| 56 |
+
|
| 57 |
+
work_exp_str = (f"### Work Experience {idx}\n"
|
| 58 |
+
f"**Company:** {company}\n\n"
|
| 59 |
+
f"**Time Range:** {time_range}\n\n"
|
| 60 |
+
f"**Title:** {title}\n\n"
|
| 61 |
+
f"**Primary Job Type:** {primary_job_type}\n\n"
|
| 62 |
+
f"**Secondary Job Type:** {secondary_job_type}\n\n")
|
| 63 |
+
|
| 64 |
+
# Investing focus
|
| 65 |
+
if exp.investing_focus_asset_class_classification:
|
| 66 |
+
asset_class = exp.investing_focus_asset_class_classification.investing_focus_asset_class.value
|
| 67 |
+
sector = (
|
| 68 |
+
exp.investing_focus_sector_classification.investing_focus_sector.value
|
| 69 |
+
if exp.investing_focus_sector_classification else "N/A"
|
| 70 |
+
)
|
| 71 |
+
work_exp_str += f"**Investing Focus (Asset Class):** {asset_class}\n\n"
|
| 72 |
+
work_exp_str += f"**Investing Focus (Sector):** {sector}\n\n"
|
| 73 |
+
|
| 74 |
+
# Investment banking classification
|
| 75 |
+
if exp.investment_banking_classification:
|
| 76 |
+
ib_group = exp.investment_banking_classification.investment_banking_group.value
|
| 77 |
+
work_exp_str += f"**Investment Banking Group:** {ib_group}\n"
|
| 78 |
+
|
| 79 |
+
work_experience_outputs.append(work_exp_str)
|
| 80 |
+
|
| 81 |
+
# Combine outputs
|
| 82 |
+
education_output = '\n\n'.join(education_outputs)
|
| 83 |
+
work_experience_output = '\n\n'.join(work_experience_outputs)
|
| 84 |
+
|
| 85 |
+
full_output = f"# Classified Educations\n\n{education_output}\n\n# Classified Work Experiences\n\n{experience_output}\n\n{work_experience_output}"
|
| 86 |
+
return full_output
|
| 87 |
+
|
| 88 |
+
def main():
|
| 89 |
+
# Define Gradio interface
|
| 90 |
+
with gr.Blocks() as demo:
|
| 91 |
+
gr.Markdown("# LinkedIn Profile Analyzer")
|
| 92 |
+
gr.Markdown("Enter a LinkedIn profile URL to analyze educational and work experiences.")
|
| 93 |
+
|
| 94 |
+
profile_linkedin = gr.Textbox(label="LinkedIn Profile URL")
|
| 95 |
+
analyze_button = gr.Button("Analyze")
|
| 96 |
+
output = gr.Markdown()
|
| 97 |
+
|
| 98 |
+
async def on_analyze_click(profile_linkedin):
|
| 99 |
+
if not profile_linkedin:
|
| 100 |
+
return "Please enter a valid LinkedIn Profile URL."
|
| 101 |
+
try:
|
| 102 |
+
result = await analyze_profile(profile_linkedin)
|
| 103 |
+
return result
|
| 104 |
+
except Exception as e:
|
| 105 |
+
return f"An error occurred: {str(e)}"
|
| 106 |
+
|
| 107 |
+
analyze_button.click(fn=on_analyze_click, inputs=profile_linkedin, outputs=output)
|
| 108 |
+
demo.launch()
|
| 109 |
+
|
| 110 |
+
if __name__ == "__main__":
|
| 111 |
+
main()
|
src/vsp/app/bindings.py
CHANGED
|
@@ -5,7 +5,10 @@ from vsp.llm.llm_cache import LLMCache
|
|
| 5 |
from vsp.llm.openai.openai import AsyncOpenAIService
|
| 6 |
from vsp.llm.openai.openai_model import OpenAIModel
|
| 7 |
|
|
|
|
|
|
|
| 8 |
prompt_loader = PromptLoader()
|
| 9 |
llm_cache = LLMCache()
|
| 10 |
-
open_ai_service = CachedLLMService(llm_service=AsyncOpenAIService(OpenAIModel.
|
|
|
|
| 11 |
linkedin_downloader = LinkedinDownloader()
|
|
|
|
| 5 |
from vsp.llm.openai.openai import AsyncOpenAIService
|
| 6 |
from vsp.llm.openai.openai_model import OpenAIModel
|
| 7 |
|
| 8 |
+
|
| 9 |
+
|
| 10 |
prompt_loader = PromptLoader()
|
| 11 |
llm_cache = LLMCache()
|
| 12 |
+
open_ai_service = CachedLLMService(llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_O), cache=llm_cache)
|
| 13 |
+
|
| 14 |
linkedin_downloader = LinkedinDownloader()
|
src/vsp/app/classifiers/education_classifier.py
CHANGED
|
@@ -44,12 +44,10 @@ class EducationClassification(BaseModel):
|
|
| 44 |
|
| 45 |
Attributes:
|
| 46 |
output (SchoolType): The classified school type.
|
| 47 |
-
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 48 |
reasoning (str): Explanation for the classification decision.
|
| 49 |
"""
|
| 50 |
|
| 51 |
output: SchoolType = Field(description="The classified school type")
|
| 52 |
-
confidence: float = Field(description="Confidence level between 0.0 and 1.0")
|
| 53 |
reasoning: str = Field(description="Explanation for the classification")
|
| 54 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 55 |
|
|
@@ -88,7 +86,6 @@ class EducationClassifier:
|
|
| 88 |
case school_type if school_type in _SCHOOL_TYPE_MAPPING:
|
| 89 |
return EducationClassification(
|
| 90 |
output=_SCHOOL_TYPE_MAPPING[school_type],
|
| 91 |
-
confidence=float(parsed["confidence"]),
|
| 92 |
reasoning=parsed["reasoning"],
|
| 93 |
)
|
| 94 |
case _:
|
|
|
|
| 44 |
|
| 45 |
Attributes:
|
| 46 |
output (SchoolType): The classified school type.
|
|
|
|
| 47 |
reasoning (str): Explanation for the classification decision.
|
| 48 |
"""
|
| 49 |
|
| 50 |
output: SchoolType = Field(description="The classified school type")
|
|
|
|
| 51 |
reasoning: str = Field(description="Explanation for the classification")
|
| 52 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 53 |
|
|
|
|
| 86 |
case school_type if school_type in _SCHOOL_TYPE_MAPPING:
|
| 87 |
return EducationClassification(
|
| 88 |
output=_SCHOOL_TYPE_MAPPING[school_type],
|
|
|
|
| 89 |
reasoning=parsed["reasoning"],
|
| 90 |
)
|
| 91 |
case _:
|
src/vsp/app/classifiers/work_experience/general_work_experience_classifier.py
CHANGED
|
@@ -32,7 +32,7 @@ class PrimaryJobType(str, Enum):
|
|
| 32 |
"""Enumeration of different primary job types for work experience classification."""
|
| 33 |
|
| 34 |
FULL_TIME = "Full-time"
|
| 35 |
-
ADVISORY_BOARD_INVESTOR = "Advisory / Board /
|
| 36 |
INTERNSHIP = "Internship"
|
| 37 |
EXTRACURRICULAR = "Extracurricular"
|
| 38 |
EDUCATION = "Education"
|
|
@@ -65,13 +65,11 @@ class WorkExperienceClassification(BaseModel):
|
|
| 65 |
Attributes:
|
| 66 |
primary_job_type (PrimaryJobType): The classified primary job type.
|
| 67 |
secondary_job_type (SecondaryJobType): The classified secondary job type.
|
| 68 |
-
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 69 |
reasoning (str): Explanation for the classification decision.
|
| 70 |
"""
|
| 71 |
|
| 72 |
primary_job_type: PrimaryJobType = Field(description="The classified primary job type")
|
| 73 |
secondary_job_type: SecondaryJobType = Field(description="The classified secondary job type")
|
| 74 |
-
confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
|
| 75 |
reasoning: str = Field(description="Explanation for the classification")
|
| 76 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 77 |
|
|
@@ -117,7 +115,7 @@ class WorkExperienceClassifier:
|
|
| 117 |
WorkExperienceClassification: A structured representation of the classification result.
|
| 118 |
|
| 119 |
Raises:
|
| 120 |
-
ValueError: If the output contains an unknown job type
|
| 121 |
"""
|
| 122 |
lines = output.strip().split("\n")
|
| 123 |
parsed: dict[str, Any] = {}
|
|
@@ -136,15 +134,10 @@ class WorkExperienceClassifier:
|
|
| 136 |
except KeyError as e:
|
| 137 |
raise ValueError(f"Unknown job type: {str(e)}")
|
| 138 |
|
| 139 |
-
try:
|
| 140 |
-
confidence = float(parsed["confidence"])
|
| 141 |
-
except ValueError:
|
| 142 |
-
raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
|
| 143 |
|
| 144 |
return WorkExperienceClassification(
|
| 145 |
primary_job_type=PrimaryJobType(primary_job_type),
|
| 146 |
secondary_job_type=SecondaryJobType(secondary_job_type),
|
| 147 |
-
confidence=confidence,
|
| 148 |
reasoning=parsed["reasoning"],
|
| 149 |
)
|
| 150 |
|
|
|
|
| 32 |
"""Enumeration of different primary job types for work experience classification."""
|
| 33 |
|
| 34 |
FULL_TIME = "Full-time"
|
| 35 |
+
ADVISORY_BOARD_INVESTOR = "Advisory / Board / Investor"
|
| 36 |
INTERNSHIP = "Internship"
|
| 37 |
EXTRACURRICULAR = "Extracurricular"
|
| 38 |
EDUCATION = "Education"
|
|
|
|
| 65 |
Attributes:
|
| 66 |
primary_job_type (PrimaryJobType): The classified primary job type.
|
| 67 |
secondary_job_type (SecondaryJobType): The classified secondary job type.
|
|
|
|
| 68 |
reasoning (str): Explanation for the classification decision.
|
| 69 |
"""
|
| 70 |
|
| 71 |
primary_job_type: PrimaryJobType = Field(description="The classified primary job type")
|
| 72 |
secondary_job_type: SecondaryJobType = Field(description="The classified secondary job type")
|
|
|
|
| 73 |
reasoning: str = Field(description="Explanation for the classification")
|
| 74 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 75 |
|
|
|
|
| 115 |
WorkExperienceClassification: A structured representation of the classification result.
|
| 116 |
|
| 117 |
Raises:
|
| 118 |
+
ValueError: If the output contains an unknown job type.
|
| 119 |
"""
|
| 120 |
lines = output.strip().split("\n")
|
| 121 |
parsed: dict[str, Any] = {}
|
|
|
|
| 134 |
except KeyError as e:
|
| 135 |
raise ValueError(f"Unknown job type: {str(e)}")
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
return WorkExperienceClassification(
|
| 139 |
primary_job_type=PrimaryJobType(primary_job_type),
|
| 140 |
secondary_job_type=SecondaryJobType(secondary_job_type),
|
|
|
|
| 141 |
reasoning=parsed["reasoning"],
|
| 142 |
)
|
| 143 |
|
src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py
CHANGED
|
@@ -42,7 +42,7 @@ class InvestingFocusAssetClass(str, Enum):
|
|
| 42 |
CREDIT = "Credit"
|
| 43 |
SECONDARIES = "Secondaries"
|
| 44 |
OTHER = "Other"
|
| 45 |
-
|
| 46 |
|
| 47 |
|
| 48 |
_INVESTING_FOCUS_ASSET_CLASS_MAPPINGS: Final[dict[str, InvestingFocusAssetClass]] = {
|
|
@@ -56,13 +56,11 @@ class InvestingFocusAssetClassClassification(BaseModel):
|
|
| 56 |
|
| 57 |
Attributes:
|
| 58 |
investing_focus_asset_class (InvestingFocusAssetClass): The classified investing focus or asset class.
|
| 59 |
-
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 60 |
reasoning (str): Explanation for the classification decision.
|
| 61 |
other_description (str | None): Description for 'Other' classification, if applicable.
|
| 62 |
"""
|
| 63 |
|
| 64 |
investing_focus_asset_class: InvestingFocusAssetClass = Field(description="The investing focus or asset class")
|
| 65 |
-
confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
|
| 66 |
reasoning: str = Field(description="Explanation for the classification")
|
| 67 |
other_description: str | None = Field(default=None, description="Description for 'Other' classification")
|
| 68 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
|
@@ -111,7 +109,7 @@ class InvestingFocusAssetClassClassifier:
|
|
| 111 |
InvestingFocusAssetClassClassification: A structured representation of the classification result.
|
| 112 |
|
| 113 |
Raises:
|
| 114 |
-
ValueError: If the output contains an unknown investing focus or asset class
|
| 115 |
"""
|
| 116 |
lines = output.strip().split("\n")
|
| 117 |
parsed: dict[str, Any] = {}
|
|
@@ -126,14 +124,10 @@ class InvestingFocusAssetClassClassifier:
|
|
| 126 |
except KeyError as e:
|
| 127 |
raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
|
| 128 |
|
| 129 |
-
|
| 130 |
-
confidence = float(parsed["confidence"])
|
| 131 |
-
except ValueError:
|
| 132 |
-
raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
|
| 133 |
|
| 134 |
classification = InvestingFocusAssetClassClassification(
|
| 135 |
investing_focus_asset_class=investing_focus_asset_class,
|
| 136 |
-
confidence=confidence,
|
| 137 |
reasoning=parsed["reasoning"],
|
| 138 |
)
|
| 139 |
|
|
@@ -141,7 +135,6 @@ class InvestingFocusAssetClassClassifier:
|
|
| 141 |
other_description = parsed.get("other_description")
|
| 142 |
classification = InvestingFocusAssetClassClassification(
|
| 143 |
investing_focus_asset_class=investing_focus_asset_class,
|
| 144 |
-
confidence=confidence,
|
| 145 |
reasoning=parsed["reasoning"],
|
| 146 |
other_description=other_description,
|
| 147 |
)
|
|
@@ -174,4 +167,5 @@ class InvestingFocusAssetClassClassifier:
|
|
| 174 |
resume=format_profile_as_resume(linkedin_profile),
|
| 175 |
work_experience=format_position(work_experience),
|
| 176 |
)
|
| 177 |
-
|
|
|
|
|
|
| 42 |
CREDIT = "Credit"
|
| 43 |
SECONDARIES = "Secondaries"
|
| 44 |
OTHER = "Other"
|
| 45 |
+
INSUFFICIENT_INFORMATION = "Insufficient Information"
|
| 46 |
|
| 47 |
|
| 48 |
_INVESTING_FOCUS_ASSET_CLASS_MAPPINGS: Final[dict[str, InvestingFocusAssetClass]] = {
|
|
|
|
| 56 |
|
| 57 |
Attributes:
|
| 58 |
investing_focus_asset_class (InvestingFocusAssetClass): The classified investing focus or asset class.
|
|
|
|
| 59 |
reasoning (str): Explanation for the classification decision.
|
| 60 |
other_description (str | None): Description for 'Other' classification, if applicable.
|
| 61 |
"""
|
| 62 |
|
| 63 |
investing_focus_asset_class: InvestingFocusAssetClass = Field(description="The investing focus or asset class")
|
|
|
|
| 64 |
reasoning: str = Field(description="Explanation for the classification")
|
| 65 |
other_description: str | None = Field(default=None, description="Description for 'Other' classification")
|
| 66 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
|
|
|
| 109 |
InvestingFocusAssetClassClassification: A structured representation of the classification result.
|
| 110 |
|
| 111 |
Raises:
|
| 112 |
+
ValueError: If the output contains an unknown investing focus or asset class.
|
| 113 |
"""
|
| 114 |
lines = output.strip().split("\n")
|
| 115 |
parsed: dict[str, Any] = {}
|
|
|
|
| 124 |
except KeyError as e:
|
| 125 |
raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
|
| 126 |
|
| 127 |
+
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
classification = InvestingFocusAssetClassClassification(
|
| 130 |
investing_focus_asset_class=investing_focus_asset_class,
|
|
|
|
| 131 |
reasoning=parsed["reasoning"],
|
| 132 |
)
|
| 133 |
|
|
|
|
| 135 |
other_description = parsed.get("other_description")
|
| 136 |
classification = InvestingFocusAssetClassClassification(
|
| 137 |
investing_focus_asset_class=investing_focus_asset_class,
|
|
|
|
| 138 |
reasoning=parsed["reasoning"],
|
| 139 |
other_description=other_description,
|
| 140 |
)
|
|
|
|
| 167 |
resume=format_profile_as_resume(linkedin_profile),
|
| 168 |
work_experience=format_position(work_experience),
|
| 169 |
)
|
| 170 |
+
result = await prompt.evaluate() # type: ignore
|
| 171 |
+
return result
|
src/vsp/app/classifiers/work_experience/investing_focus_sector_classifier.py
CHANGED
|
@@ -42,7 +42,7 @@ class InvestingFocusSector(str, Enum):
|
|
| 42 |
FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
|
| 43 |
INFRASTRUCTURE_TRANSPORTATION = "Infrastructure / Transportation"
|
| 44 |
OTHER = "Other"
|
| 45 |
-
|
| 46 |
|
| 47 |
|
| 48 |
_INVESTING_FOCUS_SECTOR_MAPPINGS: Final[dict[str, InvestingFocusSector]] = {
|
|
@@ -56,12 +56,10 @@ class InvestingFocusSectorClassification(BaseModel):
|
|
| 56 |
|
| 57 |
Attributes:
|
| 58 |
investing_focus_sector (InvestingFocusSector): The classified investing focus sector.
|
| 59 |
-
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 60 |
reasoning (str): Explanation for the classification decision.
|
| 61 |
"""
|
| 62 |
|
| 63 |
investing_focus_sector: InvestingFocusSector = Field(description="The investing focus sector")
|
| 64 |
-
confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
|
| 65 |
reasoning: str = Field(description="Explanation for the classification")
|
| 66 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 67 |
|
|
@@ -109,8 +107,6 @@ class InvestingFocusSectorClassifier:
|
|
| 109 |
Returns:
|
| 110 |
InvestingFocusSectorClassification: A structured representation of the classification result.
|
| 111 |
|
| 112 |
-
Raises:
|
| 113 |
-
ValueError: If the output contains an unknown investing focus sector or invalid confidence value.
|
| 114 |
"""
|
| 115 |
lines = output.strip().split("\n")
|
| 116 |
parsed: dict[str, Any] = {}
|
|
@@ -125,14 +121,8 @@ class InvestingFocusSectorClassifier:
|
|
| 125 |
except KeyError as e:
|
| 126 |
raise ValueError(f"Unknown investing focus sector: {str(e)}")
|
| 127 |
|
| 128 |
-
try:
|
| 129 |
-
confidence = float(parsed["confidence"])
|
| 130 |
-
except ValueError:
|
| 131 |
-
raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
|
| 132 |
-
|
| 133 |
classification = InvestingFocusSectorClassification(
|
| 134 |
investing_focus_sector=investing_focus_sector,
|
| 135 |
-
confidence=confidence,
|
| 136 |
reasoning=parsed["reasoning"],
|
| 137 |
)
|
| 138 |
return classification
|
|
|
|
| 42 |
FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
|
| 43 |
INFRASTRUCTURE_TRANSPORTATION = "Infrastructure / Transportation"
|
| 44 |
OTHER = "Other"
|
| 45 |
+
INSUFFICIENT_INFORMATION = "Insufficent Information"
|
| 46 |
|
| 47 |
|
| 48 |
_INVESTING_FOCUS_SECTOR_MAPPINGS: Final[dict[str, InvestingFocusSector]] = {
|
|
|
|
| 56 |
|
| 57 |
Attributes:
|
| 58 |
investing_focus_sector (InvestingFocusSector): The classified investing focus sector.
|
|
|
|
| 59 |
reasoning (str): Explanation for the classification decision.
|
| 60 |
"""
|
| 61 |
|
| 62 |
investing_focus_sector: InvestingFocusSector = Field(description="The investing focus sector")
|
|
|
|
| 63 |
reasoning: str = Field(description="Explanation for the classification")
|
| 64 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 65 |
|
|
|
|
| 107 |
Returns:
|
| 108 |
InvestingFocusSectorClassification: A structured representation of the classification result.
|
| 109 |
|
|
|
|
|
|
|
| 110 |
"""
|
| 111 |
lines = output.strip().split("\n")
|
| 112 |
parsed: dict[str, Any] = {}
|
|
|
|
| 121 |
except KeyError as e:
|
| 122 |
raise ValueError(f"Unknown investing focus sector: {str(e)}")
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
classification = InvestingFocusSectorClassification(
|
| 125 |
investing_focus_sector=investing_focus_sector,
|
|
|
|
| 126 |
reasoning=parsed["reasoning"],
|
| 127 |
)
|
| 128 |
return classification
|
src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py
CHANGED
|
@@ -50,7 +50,7 @@ class InvestmentBankingGroup(str, Enum):
|
|
| 50 |
FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
|
| 51 |
INFRASTRUCTURE = "Infrastructure / Transportation"
|
| 52 |
OTHER = "Other"
|
| 53 |
-
|
| 54 |
|
| 55 |
|
| 56 |
_INVESTMENT_BANKING_GROUP_MAPPINGS: Final[dict[str, InvestmentBankingGroup]] = {
|
|
@@ -64,12 +64,10 @@ class InvestmentBankingGroupClassification(BaseModel):
|
|
| 64 |
|
| 65 |
Attributes:
|
| 66 |
investment_banking_group (InvestmentBankingGroup): The classified investment banking group.
|
| 67 |
-
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 68 |
reasoning (str): Explanation for the classification decision.
|
| 69 |
"""
|
| 70 |
|
| 71 |
investment_banking_group: InvestmentBankingGroup = Field(description="The investment banking group")
|
| 72 |
-
confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
|
| 73 |
reasoning: str = Field(description="Explanation for the classification")
|
| 74 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 75 |
|
|
@@ -116,8 +114,6 @@ class InvestmentBankingGroupClassifier:
|
|
| 116 |
Returns:
|
| 117 |
InvestmentBankingGroupClassification: A structured representation of the classification result.
|
| 118 |
|
| 119 |
-
Raises:
|
| 120 |
-
ValueError: If the output contains an unknown investment banking group or invalid confidence value.
|
| 121 |
"""
|
| 122 |
lines = output.strip().split("\n")
|
| 123 |
parsed: dict[str, Any] = {}
|
|
@@ -132,14 +128,9 @@ class InvestmentBankingGroupClassifier:
|
|
| 132 |
except KeyError as e:
|
| 133 |
raise ValueError(f"Unknown investment banking group: {str(e)}")
|
| 134 |
|
| 135 |
-
try:
|
| 136 |
-
confidence = float(parsed["confidence"])
|
| 137 |
-
except ValueError:
|
| 138 |
-
raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
|
| 139 |
|
| 140 |
return InvestmentBankingGroupClassification(
|
| 141 |
investment_banking_group=investment_banking_group,
|
| 142 |
-
confidence=confidence,
|
| 143 |
reasoning=parsed["reasoning"],
|
| 144 |
)
|
| 145 |
|
|
|
|
| 50 |
FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
|
| 51 |
INFRASTRUCTURE = "Infrastructure / Transportation"
|
| 52 |
OTHER = "Other"
|
| 53 |
+
INSUFFICIENT_INFORMATION = "Insufficient Information"
|
| 54 |
|
| 55 |
|
| 56 |
_INVESTMENT_BANKING_GROUP_MAPPINGS: Final[dict[str, InvestmentBankingGroup]] = {
|
|
|
|
| 64 |
|
| 65 |
Attributes:
|
| 66 |
investment_banking_group (InvestmentBankingGroup): The classified investment banking group.
|
|
|
|
| 67 |
reasoning (str): Explanation for the classification decision.
|
| 68 |
"""
|
| 69 |
|
| 70 |
investment_banking_group: InvestmentBankingGroup = Field(description="The investment banking group")
|
|
|
|
| 71 |
reasoning: str = Field(description="Explanation for the classification")
|
| 72 |
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 73 |
|
|
|
|
| 114 |
Returns:
|
| 115 |
InvestmentBankingGroupClassification: A structured representation of the classification result.
|
| 116 |
|
|
|
|
|
|
|
| 117 |
"""
|
| 118 |
lines = output.strip().split("\n")
|
| 119 |
parsed: dict[str, Any] = {}
|
|
|
|
| 128 |
except KeyError as e:
|
| 129 |
raise ValueError(f"Unknown investment banking group: {str(e)}")
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
return InvestmentBankingGroupClassification(
|
| 133 |
investment_banking_group=investment_banking_group,
|
|
|
|
| 134 |
reasoning=parsed["reasoning"],
|
| 135 |
)
|
| 136 |
|
src/vsp/app/main.py
CHANGED
|
@@ -17,12 +17,18 @@ Usage:
|
|
| 17 |
"""
|
| 18 |
|
| 19 |
import asyncio
|
| 20 |
-
from typing import Sequence
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
from pydantic import BaseModel, Field
|
| 23 |
|
| 24 |
from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
|
| 25 |
from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
|
|
|
|
|
|
|
| 26 |
WorkExperienceClassification,
|
| 27 |
WorkExperienceClassifier,
|
| 28 |
)
|
|
@@ -41,6 +47,8 @@ from vsp.app.classifiers.work_experience.investment_banking_group_classifier imp
|
|
| 41 |
from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
| 44 |
class ClassifiedEducation(BaseModel):
|
| 45 |
"""
|
| 46 |
Represents a classified education item from a LinkedIn profile.
|
|
@@ -84,6 +92,8 @@ class LinkedinProfileClassificationResults(BaseModel):
|
|
| 84 |
|
| 85 |
classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
|
| 86 |
classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
class VspDataEnrichment:
|
|
@@ -109,6 +119,83 @@ class VspDataEnrichment:
|
|
| 109 |
self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
|
| 110 |
self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
|
| 113 |
"""
|
| 114 |
Process a LinkedIn profile and classify its education and work experiences.
|
|
@@ -155,15 +242,18 @@ class VspDataEnrichment:
|
|
| 155 |
work_classification.primary_job_type.INTERNSHIP,
|
| 156 |
work_classification.primary_job_type.EXTRACURRICULAR,
|
| 157 |
}:
|
|
|
|
| 158 |
if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTMENT_BANKING:
|
| 159 |
ib_classification = await self._investment_banking_classifier.classify_investment_banking_group(
|
| 160 |
profile, position
|
| 161 |
)
|
| 162 |
classified_work_experience.investment_banking_classification = ib_classification
|
| 163 |
|
| 164 |
-
if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING:
|
| 165 |
-
asset_class_task =
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
)
|
| 168 |
sector_task = self._investing_focus_sector_classifier.classify_investing_focus_sector(
|
| 169 |
profile, position
|
|
@@ -176,8 +266,11 @@ class VspDataEnrichment:
|
|
| 176 |
|
| 177 |
classified_work_experiences.append(classified_work_experience)
|
| 178 |
|
|
|
|
|
|
|
|
|
|
| 179 |
return LinkedinProfileClassificationResults(
|
| 180 |
-
classified_educations=classified_educations, classified_work_experiences=classified_work_experiences
|
| 181 |
)
|
| 182 |
|
| 183 |
|
|
|
|
| 17 |
"""
|
| 18 |
|
| 19 |
import asyncio
|
| 20 |
+
from typing import Sequence, Mapping, List
|
| 21 |
+
|
| 22 |
+
import calendar
|
| 23 |
+
from datetime import date
|
| 24 |
+
|
| 25 |
|
| 26 |
from pydantic import BaseModel, Field
|
| 27 |
|
| 28 |
from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
|
| 29 |
from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
|
| 30 |
+
PrimaryJobType,
|
| 31 |
+
SecondaryJobType,
|
| 32 |
WorkExperienceClassification,
|
| 33 |
WorkExperienceClassifier,
|
| 34 |
)
|
|
|
|
| 47 |
from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
|
| 48 |
|
| 49 |
|
| 50 |
+
from collections import defaultdict
|
| 51 |
+
|
| 52 |
class ClassifiedEducation(BaseModel):
|
| 53 |
"""
|
| 54 |
Represents a classified education item from a LinkedIn profile.
|
|
|
|
| 92 |
|
| 93 |
classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
|
| 94 |
classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
|
| 95 |
+
full_time_work_experience_years: float = Field(default=0.0)
|
| 96 |
+
full_time_work_experience_by_secondary: Mapping[SecondaryJobType, float] = Field(default_factory=dict)
|
| 97 |
|
| 98 |
|
| 99 |
class VspDataEnrichment:
|
|
|
|
| 119 |
self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
|
| 120 |
self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
|
| 121 |
|
| 122 |
+
|
| 123 |
+
def estimate_full_time_experience_by_secondary_job_type(self, classified_work_experiences: List[ClassifiedWorkExperience]) -> Mapping[SecondaryJobType, float]:
|
| 124 |
+
# Define current date
|
| 125 |
+
current_date = date(2024, 9, 18)
|
| 126 |
+
|
| 127 |
+
# List to store all events (start or end of intervals)
|
| 128 |
+
events = []
|
| 129 |
+
|
| 130 |
+
# Set to store all observed SecondaryJobTypes
|
| 131 |
+
observed_secondary_job_types = set()
|
| 132 |
+
|
| 133 |
+
for cwe in classified_work_experiences:
|
| 134 |
+
classification = cwe.work_experience_classification.primary_job_type
|
| 135 |
+
secondary_job_type = cwe.work_experience_classification.secondary_job_type
|
| 136 |
+
|
| 137 |
+
if classification == PrimaryJobType.FULL_TIME and secondary_job_type:
|
| 138 |
+
# Normalize start date
|
| 139 |
+
start = cwe.position.start
|
| 140 |
+
if not start or not start.year:
|
| 141 |
+
continue
|
| 142 |
+
start_year = start.year
|
| 143 |
+
start_month = start.month if start.month else 1
|
| 144 |
+
start_day = start.day if start.day else 1
|
| 145 |
+
start_date = date(start_year, start_month, start_day)
|
| 146 |
+
|
| 147 |
+
# Normalize end date
|
| 148 |
+
end = cwe.position.end
|
| 149 |
+
if end is None:
|
| 150 |
+
end_date = current_date
|
| 151 |
+
else:
|
| 152 |
+
if not end.year:
|
| 153 |
+
continue
|
| 154 |
+
end_year = end.year
|
| 155 |
+
end_month = end.month if end.month else 12
|
| 156 |
+
if end.day:
|
| 157 |
+
end_day = end.day
|
| 158 |
+
else:
|
| 159 |
+
# Get last day of the month
|
| 160 |
+
_, end_day = calendar.monthrange(end_year, end_month)
|
| 161 |
+
end_date = date(end_year, end_month, end_day)
|
| 162 |
+
|
| 163 |
+
if start_date > end_date:
|
| 164 |
+
continue # Skip invalid intervals
|
| 165 |
+
|
| 166 |
+
# Add events for sweep-line algorithm
|
| 167 |
+
events.append((start_date, 'start', secondary_job_type))
|
| 168 |
+
events.append((end_date, 'end', secondary_job_type))
|
| 169 |
+
|
| 170 |
+
observed_secondary_job_types.add(secondary_job_type)
|
| 171 |
+
|
| 172 |
+
# Sort events by date
|
| 173 |
+
events.sort(key=lambda x: x[0])
|
| 174 |
+
|
| 175 |
+
active_secondary_job_types = set()
|
| 176 |
+
last_date = None
|
| 177 |
+
durations = defaultdict(int) # in days
|
| 178 |
+
|
| 179 |
+
for event_date, event_type, secondary_job_type in events:
|
| 180 |
+
if last_date is not None and event_date > last_date:
|
| 181 |
+
interval_duration = (event_date - last_date).days
|
| 182 |
+
# Distribute the interval_duration among active_secondary_job_types
|
| 183 |
+
for active_type in active_secondary_job_types:
|
| 184 |
+
durations[active_type] += interval_duration
|
| 185 |
+
|
| 186 |
+
if event_type == 'start':
|
| 187 |
+
active_secondary_job_types.add(secondary_job_type)
|
| 188 |
+
elif event_type == 'end':
|
| 189 |
+
active_secondary_job_types.discard(secondary_job_type)
|
| 190 |
+
|
| 191 |
+
last_date = event_date
|
| 192 |
+
|
| 193 |
+
# Convert durations from days to years
|
| 194 |
+
durations_in_years = {stype: round(days / 365.25, 2) for stype, days in durations.items()}
|
| 195 |
+
|
| 196 |
+
return durations_in_years
|
| 197 |
+
|
| 198 |
+
|
| 199 |
async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
|
| 200 |
"""
|
| 201 |
Process a LinkedIn profile and classify its education and work experiences.
|
|
|
|
| 242 |
work_classification.primary_job_type.INTERNSHIP,
|
| 243 |
work_classification.primary_job_type.EXTRACURRICULAR,
|
| 244 |
}:
|
| 245 |
+
|
| 246 |
if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTMENT_BANKING:
|
| 247 |
ib_classification = await self._investment_banking_classifier.classify_investment_banking_group(
|
| 248 |
profile, position
|
| 249 |
)
|
| 250 |
classified_work_experience.investment_banking_classification = ib_classification
|
| 251 |
|
| 252 |
+
if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING and not work_classification.primary_job_type == work_classification.primary_job_type.ADVISORY_BOARD_INVESTOR:
|
| 253 |
+
asset_class_task = (
|
| 254 |
+
self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
|
| 255 |
+
profile, position
|
| 256 |
+
)
|
| 257 |
)
|
| 258 |
sector_task = self._investing_focus_sector_classifier.classify_investing_focus_sector(
|
| 259 |
profile, position
|
|
|
|
| 266 |
|
| 267 |
classified_work_experiences.append(classified_work_experience)
|
| 268 |
|
| 269 |
+
|
| 270 |
+
experience_by_job_type = self.estimate_full_time_experience_by_secondary_job_type(classified_work_experiences)
|
| 271 |
+
total_work_experience = sum(experience_by_job_type.values())
|
| 272 |
return LinkedinProfileClassificationResults(
|
| 273 |
+
classified_educations=classified_educations, classified_work_experiences=classified_work_experiences, full_time_work_experience_years=total_work_experience, full_time_work_experience_by_secondary=experience_by_job_type
|
| 274 |
)
|
| 275 |
|
| 276 |
|
src/vsp/app/model/linkedin/linkedin_models.py
CHANGED
|
@@ -79,6 +79,7 @@ class Position(StartEndMixin):
|
|
| 79 |
company_username: str | None = None
|
| 80 |
company_url: str | None = None
|
| 81 |
company_industry: str | None = None
|
|
|
|
| 82 |
company_staff_count_range: str | None = None
|
| 83 |
title: str | None = None
|
| 84 |
location: str | None = None
|
|
@@ -90,7 +91,6 @@ class Position(StartEndMixin):
|
|
| 90 |
class Skill(BaseSchema):
|
| 91 |
name: str | None = None
|
| 92 |
|
| 93 |
-
|
| 94 |
class Course(BaseSchema):
|
| 95 |
name: str | None = None
|
| 96 |
number: str | None = None
|
|
@@ -127,7 +127,7 @@ class LinkedinProfile(BaseSchema):
|
|
| 127 |
languages: List[Language] | None = []
|
| 128 |
educations: List[Education] = []
|
| 129 |
positions: List[Position] = Field(default=[], alias="position")
|
| 130 |
-
full_positions: List[Position] = Field(default=[]
|
| 131 |
skills: List[Skill] | None = []
|
| 132 |
courses: List[Course] | None = []
|
| 133 |
certifications: List[Certification] | None = []
|
|
@@ -141,8 +141,5 @@ class LinkedinProfile(BaseSchema):
|
|
| 141 |
:return: A Profile instance created from the given JSON data.
|
| 142 |
"""
|
| 143 |
profile = LinkedinProfile.model_validate(json)
|
| 144 |
-
|
| 145 |
-
profile.full_positions is not None and profile.positions is not None
|
| 146 |
-
): # Fixing a RapidAPI thing where the positions may be incomplete, and we want to use the full_positions
|
| 147 |
-
profile.positions = profile.full_positions
|
| 148 |
return profile
|
|
|
|
| 79 |
company_username: str | None = None
|
| 80 |
company_url: str | None = None
|
| 81 |
company_industry: str | None = None
|
| 82 |
+
company_description: str | None = None
|
| 83 |
company_staff_count_range: str | None = None
|
| 84 |
title: str | None = None
|
| 85 |
location: str | None = None
|
|
|
|
| 91 |
class Skill(BaseSchema):
|
| 92 |
name: str | None = None
|
| 93 |
|
|
|
|
| 94 |
class Course(BaseSchema):
|
| 95 |
name: str | None = None
|
| 96 |
number: str | None = None
|
|
|
|
| 127 |
languages: List[Language] | None = []
|
| 128 |
educations: List[Education] = []
|
| 129 |
positions: List[Position] = Field(default=[], alias="position")
|
| 130 |
+
full_positions: List[Position] = Field(default=[])
|
| 131 |
skills: List[Skill] | None = []
|
| 132 |
courses: List[Course] | None = []
|
| 133 |
certifications: List[Certification] | None = []
|
|
|
|
| 141 |
:return: A Profile instance created from the given JSON data.
|
| 142 |
"""
|
| 143 |
profile = LinkedinProfile.model_validate(json)
|
| 144 |
+
profile.positions = profile.full_positions
|
|
|
|
|
|
|
|
|
|
| 145 |
return profile
|
src/vsp/app/prompts/education_classifier/1 - education_classifier_human.txt
CHANGED
|
@@ -6,4 +6,4 @@ Full Resume:
|
|
| 6 |
Specific Linkedin Education Item:
|
| 7 |
{education}
|
| 8 |
|
| 9 |
-
|
|
|
|
| 6 |
Specific Linkedin Education Item:
|
| 7 |
{education}
|
| 8 |
|
| 9 |
+
Ensure your reasoning refers to specific details from both the resume and the Linkedin education item that support your decision.
|
src/vsp/app/prompts/education_classifier/1 - education_classifier_system.txt
CHANGED
|
@@ -15,7 +15,6 @@ Pay close attention to the degree type, field of study, and any other relevant i
|
|
| 15 |
Provide your response in the following format:
|
| 16 |
|
| 17 |
output: [CATEGORY_NAME]
|
| 18 |
-
confidence: [0.0 to 1.0]
|
| 19 |
reasoning: [Your explanation here]
|
| 20 |
|
| 21 |
Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
|
|
|
|
| 15 |
Provide your response in the following format:
|
| 16 |
|
| 17 |
output: [CATEGORY_NAME]
|
|
|
|
| 18 |
reasoning: [Your explanation here]
|
| 19 |
|
| 20 |
Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
|
src/vsp/app/prompts/work_experience_classifier/1 - work_experience_classifier_human.txt
CHANGED
|
@@ -6,4 +6,4 @@ Full Resume:
|
|
| 6 |
Specific Work Experience Item:
|
| 7 |
{work_experience}
|
| 8 |
|
| 9 |
-
Provide your classification for both Primary Job Type and Secondary Job Type
|
|
|
|
| 6 |
Specific Work Experience Item:
|
| 7 |
{work_experience}
|
| 8 |
|
| 9 |
+
Provide your classification for both Primary Job Type and Secondary Job Type and reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
|
src/vsp/app/prompts/work_experience_classifier/1 - work_experience_classifier_system.txt
CHANGED
|
@@ -4,7 +4,7 @@ Use the provided information carefully to make accurate classifications. Pay clo
|
|
| 4 |
|
| 5 |
Primary Job Type categories:
|
| 6 |
1. FULL_TIME: Regular, ongoing employment
|
| 7 |
-
2. ADVISORY_BOARD_INVESTOR:
|
| 8 |
3. INTERNSHIP: Short-term positions for students or recent graduates, including summer/seasonal analyst roles
|
| 9 |
4. EXTRACURRICULAR: Unpaid activities related to career development, such as student clubs or competitions
|
| 10 |
5. EDUCATION: When educational experiences are listed as work experiences
|
|
@@ -12,13 +12,15 @@ Primary Job Type categories:
|
|
| 12 |
|
| 13 |
Some tips for primary job type categories:
|
| 14 |
- Primary job categories are used to describe the hours and compensation setup of the work experience, as opposed to the industry of focus.
|
| 15 |
-
- It cannot be ENTREPRENEUR_FOUNDER: Founding or co-founding a company, as this is a secondary job type category.
|
| 16 |
- If the role is a 2-4 month job that happened in the summer, it's likely an INTERNSHIP. Something that's longer than 6 months is unlikely to be an internship unless it meets the other criteria below.
|
| 17 |
- If the role is a part-time job while the candidate was in school, it's likely an INTERNSHIP.
|
| 18 |
- If the role is before the first full-time job, it's probably during school and should be classified as INTERNSHIP.
|
| 19 |
- If the role explicitly says "intern", "internship", or "externship", 'summer analyst', 'summer associate', 'winter analyst', or 'winter associate' it's almost certainly an INTERNSHIP.
|
| 20 |
- If the role is a student club, a competition, or something related to the college of the job candidate, it's likely EXTRACURRICULAR.
|
| 21 |
- If the role's company is a fraternity, a sorority, or a business fraternity, it's likely EXTRACURRICULAR.
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
Secondary Job Type categories:
|
| 24 |
1. INVESTING: Professional investing roles, such as venture capital, private equity, or hedge fund positions (Note: Roles in private equity should be classified as INVESTING)
|
|
@@ -28,7 +30,7 @@ Secondary Job Type categories:
|
|
| 28 |
5. ENGINEERING: Software development, hardware engineering, or other technical roles
|
| 29 |
6. ENTREPRENEUR_FOUNDER: Founding or co-founding a company
|
| 30 |
7. CORPDEV_STRATEGY: Corporate development or strategic planning roles
|
| 31 |
-
8. OTHER: Any role that doesn't fit the above categories
|
| 32 |
|
| 33 |
Some tips for secondary job type categories:
|
| 34 |
- If the role involves analyzing potential investments, it's likely INVESTING.
|
|
@@ -38,7 +40,6 @@ Some tips for secondary job type categories:
|
|
| 38 |
Provide your response in the following format exactly:
|
| 39 |
|
| 40 |
reasoning: [Your explanation here]
|
| 41 |
-
confidence: [0.0 to 1.0]
|
| 42 |
primary_job_type: [one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER]
|
| 43 |
secondary_job_type: [one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER]
|
| 44 |
|
|
@@ -47,4 +48,4 @@ Ensure each part of your response is on a separate line, exactly as shown above.
|
|
| 47 |
The PRIMARY_JOB_TYPE must be one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER
|
| 48 |
The SECONDARY_JOB_TYPE must be one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER
|
| 49 |
|
| 50 |
-
|
|
|
|
| 4 |
|
| 5 |
Primary Job Type categories:
|
| 6 |
1. FULL_TIME: Regular, ongoing employment
|
| 7 |
+
2. ADVISORY_BOARD_INVESTOR: When advisory roles, board memberships, or investor status in a specific company are listed as work experience
|
| 8 |
3. INTERNSHIP: Short-term positions for students or recent graduates, including summer/seasonal analyst roles
|
| 9 |
4. EXTRACURRICULAR: Unpaid activities related to career development, such as student clubs or competitions
|
| 10 |
5. EDUCATION: When educational experiences are listed as work experiences
|
|
|
|
| 12 |
|
| 13 |
Some tips for primary job type categories:
|
| 14 |
- Primary job categories are used to describe the hours and compensation setup of the work experience, as opposed to the industry of focus.
|
|
|
|
| 15 |
- If the role is a 2-4 month job that happened in the summer, it's likely an INTERNSHIP. Something that's longer than 6 months is unlikely to be an internship unless it meets the other criteria below.
|
| 16 |
- If the role is a part-time job while the candidate was in school, it's likely an INTERNSHIP.
|
| 17 |
- If the role is before the first full-time job, it's probably during school and should be classified as INTERNSHIP.
|
| 18 |
- If the role explicitly says "intern", "internship", or "externship", 'summer analyst', 'summer associate', 'winter analyst', or 'winter associate' it's almost certainly an INTERNSHIP.
|
| 19 |
- If the role is a student club, a competition, or something related to the college of the job candidate, it's likely EXTRACURRICULAR.
|
| 20 |
- If the role's company is a fraternity, a sorority, or a business fraternity, it's likely EXTRACURRICULAR.
|
| 21 |
+
- If the job title is "Investor", but it overlaps with other full-time employment, and the company description sounds like an operating company rather than an investment firm, it's likely ADVISORY_BOARD_INVESTOR.
|
| 22 |
+
- e.g., Someone says they were an "Investor" at OpenAI in 2023 but their resume indicates they had full-time employment at Sequoia Capital during this time, then they are saying that they invested in OpenAI while they were at Sequoia, not that they were an investor on behalf of OpenAI, and thus the classification for the "Investor at OpenAI" role should be ADVISORY_BOARD_INVESTOR.
|
| 23 |
+
- In general, if the company's description sounds more like an operating company rather than an investment firm/fund and the title is "Investor", there's a high chance the correct classification is ADVISORY_BOARD_INVESTOR.
|
| 24 |
|
| 25 |
Secondary Job Type categories:
|
| 26 |
1. INVESTING: Professional investing roles, such as venture capital, private equity, or hedge fund positions (Note: Roles in private equity should be classified as INVESTING)
|
|
|
|
| 30 |
5. ENGINEERING: Software development, hardware engineering, or other technical roles
|
| 31 |
6. ENTREPRENEUR_FOUNDER: Founding or co-founding a company
|
| 32 |
7. CORPDEV_STRATEGY: Corporate development or strategic planning roles
|
| 33 |
+
8. OTHER: Any role that doesn't fit the above categories (e.g., corporate finance)
|
| 34 |
|
| 35 |
Some tips for secondary job type categories:
|
| 36 |
- If the role involves analyzing potential investments, it's likely INVESTING.
|
|
|
|
| 40 |
Provide your response in the following format exactly:
|
| 41 |
|
| 42 |
reasoning: [Your explanation here]
|
|
|
|
| 43 |
primary_job_type: [one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER]
|
| 44 |
secondary_job_type: [one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER]
|
| 45 |
|
|
|
|
| 48 |
The PRIMARY_JOB_TYPE must be one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER
|
| 49 |
The SECONDARY_JOB_TYPE must be one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER
|
| 50 |
|
| 51 |
+
In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
|
src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_human.txt
CHANGED
|
@@ -6,7 +6,7 @@ Full Resume:
|
|
| 6 |
Specific Work Experience Item:
|
| 7 |
{work_experience}
|
| 8 |
|
| 9 |
-
Provide your classification for the investing focus or asset class OF THIS SPECIFIC WORK EXPERIENCE, along with your
|
| 10 |
|
| 11 |
-
NOTE: The candidate may have changed jobs, and therefore, investing focus. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific
|
| 12 |
|
|
|
|
| 6 |
Specific Work Experience Item:
|
| 7 |
{work_experience}
|
| 8 |
|
| 9 |
+
Provide your classification for the investing focus or asset class OF THIS SPECIFIC WORK EXPERIENCE, along with your reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
|
| 10 |
|
| 11 |
+
NOTE: The candidate may have changed jobs, and therefore, investing focus. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific work experience on a standalone basis, set your classification to INSUFFICIENT_INFORMATION.
|
| 12 |
|
src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_system.txt
CHANGED
|
@@ -15,19 +15,22 @@ Investing Focus / Asset Class categories:
|
|
| 15 |
10. CREDIT: Credit investments
|
| 16 |
11. SECONDARIES: Secondary market investments
|
| 17 |
12. OTHER: Any focus or asset class that doesn't fit the above categories
|
| 18 |
-
13.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
Provide your response in the following format exactly:
|
| 21 |
|
| 22 |
investing_focus_asset_class: [ONE_OF_THE_ABOVE_CATEGORIES]
|
| 23 |
other_description: [Only if OTHER is selected, provide a brief description]
|
| 24 |
-
confidence: [0.0 to 1.0]
|
| 25 |
reasoning: [Your explanation here]
|
| 26 |
|
| 27 |
Ensure each part of your response is on a separate line, exactly as shown above. There should be only three or four lines (depending on whether OTHER is selected).
|
| 28 |
|
| 29 |
-
The investing_focus_asset_class must be one of: EARLY_STAGE_VC, LATE_STAGE_VC, MULTI_STAGE_VC, GROWTH_EQUITY, PRE_IPO, PUBLIC_EQUITIES, REAL_ESTATE, PRIVATE_EQUITY_BUYOUTS, HEDGE_FUND, CREDIT, SECONDARIES, OTHER.
|
| 30 |
-
|
| 31 |
-
Your confidence level should reflect how certain you are about your classification based on the information provided.
|
| 32 |
|
| 33 |
In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
|
|
|
|
| 15 |
10. CREDIT: Credit investments
|
| 16 |
11. SECONDARIES: Secondary market investments
|
| 17 |
12. OTHER: Any focus or asset class that doesn't fit the above categories
|
| 18 |
+
13. INSUFFICIENT_INFORMATION: If the information provided is insufficient to make a classification
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
Hints:
|
| 22 |
+
- If they worked for a firm that invests in multiple of these categories, use whichever context clues that you can to select which category applies best.
|
| 23 |
+
- For example if they work for a firm that does both venture capital and real estate investments and do not provide a description beyond "associate" for their current role, but their prior experience is all in real estate, you may assume this position is in real estate as well.
|
| 24 |
+
- If they work for a firm that does multistage VC, but they don't specify which stage they invested in, you can simply answer "MULTI_STAGE_VC" rather than "INSUFFICIENT_INFORMATION"
|
| 25 |
|
| 26 |
Provide your response in the following format exactly:
|
| 27 |
|
| 28 |
investing_focus_asset_class: [ONE_OF_THE_ABOVE_CATEGORIES]
|
| 29 |
other_description: [Only if OTHER is selected, provide a brief description]
|
|
|
|
| 30 |
reasoning: [Your explanation here]
|
| 31 |
|
| 32 |
Ensure each part of your response is on a separate line, exactly as shown above. There should be only three or four lines (depending on whether OTHER is selected).
|
| 33 |
|
| 34 |
+
The investing_focus_asset_class must be one of: EARLY_STAGE_VC, LATE_STAGE_VC, MULTI_STAGE_VC, GROWTH_EQUITY, PRE_IPO, PUBLIC_EQUITIES, REAL_ESTATE, PRIVATE_EQUITY_BUYOUTS, HEDGE_FUND, CREDIT, SECONDARIES, OTHER, INSUFFICIENT_INFORMATION.
|
|
|
|
|
|
|
| 35 |
|
| 36 |
In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
|
src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_human.txt
CHANGED
|
@@ -6,6 +6,6 @@ Full Resume:
|
|
| 6 |
Specific Work Experience Item:
|
| 7 |
{work_experience}
|
| 8 |
|
| 9 |
-
Provide your classification for the investing focus sector OF THIS SPECIFIC WORK EXPERIENCE
|
| 10 |
|
| 11 |
-
NOTE: The candidate may have changed jobs, and therefore, investing focus sectors. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific work experience on a standalone basis,
|
|
|
|
| 6 |
Specific Work Experience Item:
|
| 7 |
{work_experience}
|
| 8 |
|
| 9 |
+
Provide your classification for the investing focus sector OF THIS SPECIFIC WORK EXPERIENCE and reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision.
|
| 10 |
|
| 11 |
+
NOTE: The candidate may have changed jobs, and therefore, investing focus sectors. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific work experience on a standalone basis, ensure that your classification is INSUFFICIENT_INFORMATION.
|
src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_system.txt
CHANGED
|
@@ -15,16 +15,20 @@ Investing Focus Sector categories:
|
|
| 15 |
10. FINANCIAL_INSTITUTIONS: Focus on banks, insurance, fintech, and other financial services
|
| 16 |
11. INFRASTRUCTURE_TRANSPORTATION: Focus on infrastructure projects and transportation
|
| 17 |
12. OTHER: Any focus that doesn't fit the above categories
|
| 18 |
-
13.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
Provide your response in the following format exactly:
|
| 21 |
|
| 22 |
investing_focus_sector: [ONE_OF_THE_ABOVE_CATEGORIES]
|
| 23 |
-
confidence: [0.0 to 1.0]
|
| 24 |
reasoning: [Your explanation here]
|
| 25 |
|
| 26 |
Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
|
| 27 |
|
| 28 |
-
The investing_focus_sector must be one of: GENERALIST, HEALTHCARE, INDUSTRIALS, BUSINESS_SERVICES, CONSUMER_RETAIL, ENERGY_NATURAL_RESOURCES, REAL_ESTATE_GAMING_LODGING, TECHNOLOGY_SOFTWARE_TMT, MEDIA_ENTERTAINMENT, FINANCIAL_INSTITUTIONS, INFRASTRUCTURE_TRANSPORTATION, OTHER,
|
| 29 |
-
|
| 30 |
-
Your confidence level should reflect how certain you are about your classification based on the information provided. In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision.
|
|
|
|
| 15 |
10. FINANCIAL_INSTITUTIONS: Focus on banks, insurance, fintech, and other financial services
|
| 16 |
11. INFRASTRUCTURE_TRANSPORTATION: Focus on infrastructure projects and transportation
|
| 17 |
12. OTHER: Any focus that doesn't fit the above categories
|
| 18 |
+
13. INSUFFICIENT_INFORMATION: If the information provided is insufficient to make a classification.
|
| 19 |
+
|
| 20 |
+
Hints:
|
| 21 |
+
- The firm's focus will often give you a strong hint about what sector they covered.
|
| 22 |
+
- For example, if the firm focuses on CONSUMER_RETAIL and not other sectors, you can safely assume this person focused on CONSUMER_RETAIL.
|
| 23 |
+
- However, if the firm is generalist, that doesn't necessarily mean this work experience is generalist; they may have covered some specific area for the firm.
|
| 24 |
+
- Example: A VC firm is generalist, however this person was specifically on a fintech team, based on some info from their work experience description
|
| 25 |
+
- Or they *could* have been on a specific team (not enough info to know), in which case your answer would be INSUFFICIENT_INFORMATION
|
| 26 |
|
| 27 |
Provide your response in the following format exactly:
|
| 28 |
|
| 29 |
investing_focus_sector: [ONE_OF_THE_ABOVE_CATEGORIES]
|
|
|
|
| 30 |
reasoning: [Your explanation here]
|
| 31 |
|
| 32 |
Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
|
| 33 |
|
| 34 |
+
The investing_focus_sector must be one of: GENERALIST, HEALTHCARE, INDUSTRIALS, BUSINESS_SERVICES, CONSUMER_RETAIL, ENERGY_NATURAL_RESOURCES, REAL_ESTATE_GAMING_LODGING, TECHNOLOGY_SOFTWARE_TMT, MEDIA_ENTERTAINMENT, FINANCIAL_INSTITUTIONS, INFRASTRUCTURE_TRANSPORTATION, OTHER, INSUFFICIENT_INFORMATION.
|
|
|
|
|
|
src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_human.txt
CHANGED
|
@@ -6,4 +6,4 @@ Full Resume:
|
|
| 6 |
Specific Investment Banking Work Experience Item:
|
| 7 |
{work_experience}
|
| 8 |
|
| 9 |
-
Provide your best guess on the investment banking group,
|
|
|
|
| 6 |
Specific Investment Banking Work Experience Item:
|
| 7 |
{work_experience}
|
| 8 |
|
| 9 |
+
Provide your best guess on the investment banking group, and your reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
|
src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_system.txt
CHANGED
|
@@ -23,15 +23,13 @@ Investment banking groups:
|
|
| 23 |
18. FINANCIAL_INSTITUTIONS: A group focused on financial institutions and banking transactions.
|
| 24 |
19. INFRASTRUCTURE: A group specializing in infrastructure and transportation transactions.
|
| 25 |
20. OTHER: Any group that doesn't fit the above categories.
|
| 26 |
-
21.
|
| 27 |
|
| 28 |
Provide your response in the following format exactly:
|
| 29 |
|
| 30 |
investment_banking_group: [One of the twenty investment banking groups listed above]
|
| 31 |
-
confidence: [0.0 to 1.0]
|
| 32 |
reasoning: [Your explanation here]
|
| 33 |
|
| 34 |
Ensure each part of your response is on a separate line, exactly as shown above. There should be only three lines.
|
| 35 |
-
Your confidence level should reflect how certain you are about your classification based on the information provided.
|
| 36 |
|
| 37 |
In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
|
|
|
|
| 23 |
18. FINANCIAL_INSTITUTIONS: A group focused on financial institutions and banking transactions.
|
| 24 |
19. INFRASTRUCTURE: A group specializing in infrastructure and transportation transactions.
|
| 25 |
20. OTHER: Any group that doesn't fit the above categories.
|
| 26 |
+
21. INSUFFICIENT_INFORMATION: If the information provided is insufficient to make a classification
|
| 27 |
|
| 28 |
Provide your response in the following format exactly:
|
| 29 |
|
| 30 |
investment_banking_group: [One of the twenty investment banking groups listed above]
|
|
|
|
| 31 |
reasoning: [Your explanation here]
|
| 32 |
|
| 33 |
Ensure each part of your response is on a separate line, exactly as shown above. There should be only three lines.
|
|
|
|
| 34 |
|
| 35 |
In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
|
src/vsp/app/scrapers/linkedin_downloader.py
CHANGED
|
@@ -140,6 +140,43 @@ class LinkedinDownloader:
|
|
| 140 |
headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
|
| 141 |
return (headers, querystring)
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
@retry(
|
| 144 |
wait=wait_random_exponential(min=1, max=60),
|
| 145 |
stop=stop_after_attempt(3),
|
|
@@ -170,7 +207,30 @@ class LinkedinDownloader:
|
|
| 170 |
async with session.get(self._URL, headers=headers, params=querystring) as response:
|
| 171 |
if response.status == 200:
|
| 172 |
data = await response.json()
|
| 173 |
-
|
| 174 |
else:
|
| 175 |
logger.error("Failed to fetch Linkedin profile", url=linkedin_url, status=response.status)
|
| 176 |
raise LinkedinFetchFailedError(f"Failed to fetch Linkedin profile for {linkedin_url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
|
| 141 |
return (headers, querystring)
|
| 142 |
|
| 143 |
+
|
| 144 |
+
def _compose_company_info_request(self, company_username: str) -> tuple[dict[str, str], dict[str, str]]:
|
| 145 |
+
querystring = {"username": company_username}
|
| 146 |
+
headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
|
| 147 |
+
return (headers, querystring)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
async def augment_company_descriptions(self, session: aiohttp.ClientSession, profile: LinkedinProfile):
|
| 151 |
+
new_positions = []
|
| 152 |
+
for position in profile.positions:
|
| 153 |
+
company_username = position.company_username
|
| 154 |
+
if not company_username:
|
| 155 |
+
new_positions.append(position)
|
| 156 |
+
continue
|
| 157 |
+
|
| 158 |
+
headers, querystring = self._compose_company_info_request(company_username)
|
| 159 |
+
async with session.get(self._URL + "get-company-details", headers=headers, params=querystring) as response:
|
| 160 |
+
if response.status == 200:
|
| 161 |
+
data = await response.json()
|
| 162 |
+
description = data['data'].get('description')
|
| 163 |
+
tagline = data['data'].get('tagline')
|
| 164 |
+
|
| 165 |
+
result = []
|
| 166 |
+
if tagline:
|
| 167 |
+
result.append(tagline)
|
| 168 |
+
if description:
|
| 169 |
+
result.append(description)
|
| 170 |
+
|
| 171 |
+
company_description = '\n\n'.join(result)
|
| 172 |
+
# Create a new Position instance with the updated company_description
|
| 173 |
+
position = position.model_copy(update={"company_description": company_description})
|
| 174 |
+
else:
|
| 175 |
+
logger.error("Failed to fetch Linkedin company profile", company_username=company_username, status=response.status)
|
| 176 |
+
new_positions.append(position)
|
| 177 |
+
# Update the positions list with the new positions
|
| 178 |
+
profile.positions = new_positions
|
| 179 |
+
|
| 180 |
@retry(
|
| 181 |
wait=wait_random_exponential(min=1, max=60),
|
| 182 |
stop=stop_after_attempt(3),
|
|
|
|
| 207 |
async with session.get(self._URL, headers=headers, params=querystring) as response:
|
| 208 |
if response.status == 200:
|
| 209 |
data = await response.json()
|
| 210 |
+
profile = LinkedinProfile.profile_from_json(data)
|
| 211 |
else:
|
| 212 |
logger.error("Failed to fetch Linkedin profile", url=linkedin_url, status=response.status)
|
| 213 |
raise LinkedinFetchFailedError(f"Failed to fetch Linkedin profile for {linkedin_url}")
|
| 214 |
+
await self.augment_company_descriptions(session, profile)
|
| 215 |
+
return profile
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
async def main():
|
| 220 |
+
# Initialize the LinkedinDownloader
|
| 221 |
+
downloader = LinkedinDownloader()
|
| 222 |
+
|
| 223 |
+
# LinkedIn profile URL to fetch
|
| 224 |
+
linkedin_url = "https://www.linkedin.com/in/lauren-hipple-84277373/"
|
| 225 |
+
|
| 226 |
+
try:
|
| 227 |
+
# Fetch the LinkedIn profile data
|
| 228 |
+
profile = await downloader.fetch_linkedin_data(linkedin_url)
|
| 229 |
+
|
| 230 |
+
except LinkedinFetchFailedError as e:
|
| 231 |
+
print(f"Error: {str(e)}")
|
| 232 |
+
except Exception as e:
|
| 233 |
+
print(f"An unexpected error occurred: {str(e)}")
|
| 234 |
+
|
| 235 |
+
if __name__ == "__main__":
|
| 236 |
+
asyncio.run(main())
|
tests/vsp/app/test_main.py
DELETED
|
@@ -1,206 +0,0 @@
|
|
| 1 |
-
from unittest.mock import AsyncMock, patch
|
| 2 |
-
|
| 3 |
-
import pytest
|
| 4 |
-
|
| 5 |
-
from vsp.app.classifiers.education_classifier import EducationClassification, SchoolType
|
| 6 |
-
from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
|
| 7 |
-
PrimaryJobType,
|
| 8 |
-
SecondaryJobType,
|
| 9 |
-
WorkExperienceClassification,
|
| 10 |
-
)
|
| 11 |
-
from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (
|
| 12 |
-
InvestingFocusAssetClass,
|
| 13 |
-
InvestingFocusAssetClassClassification,
|
| 14 |
-
)
|
| 15 |
-
from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (
|
| 16 |
-
InvestingFocusSector,
|
| 17 |
-
InvestingFocusSectorClassification,
|
| 18 |
-
)
|
| 19 |
-
from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
|
| 20 |
-
InvestmentBankingGroup,
|
| 21 |
-
InvestmentBankingGroupClassification,
|
| 22 |
-
)
|
| 23 |
-
from vsp.app.main import LinkedinProfileClassificationResults, process_linkedin_profile
|
| 24 |
-
from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
@pytest.fixture
|
| 28 |
-
def sample_linkedin_profile():
|
| 29 |
-
return LinkedinProfile(
|
| 30 |
-
first_name="John",
|
| 31 |
-
last_name="Doe",
|
| 32 |
-
educations=[
|
| 33 |
-
Education(
|
| 34 |
-
school_name="Test University",
|
| 35 |
-
degree="MBA",
|
| 36 |
-
field_of_study="Business",
|
| 37 |
-
)
|
| 38 |
-
],
|
| 39 |
-
position=[
|
| 40 |
-
Position(
|
| 41 |
-
title="Investment Banking Analyst",
|
| 42 |
-
company_name="Bank Corp",
|
| 43 |
-
),
|
| 44 |
-
Position(
|
| 45 |
-
title="Investment Associate",
|
| 46 |
-
company_name="VC Firm",
|
| 47 |
-
),
|
| 48 |
-
Position(
|
| 49 |
-
title="Software Engineer",
|
| 50 |
-
company_name="Tech Corp",
|
| 51 |
-
),
|
| 52 |
-
],
|
| 53 |
-
)
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
@pytest.mark.asyncio
|
| 57 |
-
async def test_process_linkedin_profile_comprehensive(sample_linkedin_profile):
|
| 58 |
-
with (
|
| 59 |
-
patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
|
| 60 |
-
patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
|
| 61 |
-
patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
|
| 62 |
-
patch("vsp.app.main.InvestingFocusAssetClassClassifier") as mock_investing_focus_asset_class_classifier,
|
| 63 |
-
patch("vsp.app.main.InvestingFocusSectorClassifier") as mock_investing_focus_sector_classifier,
|
| 64 |
-
):
|
| 65 |
-
mock_education_classifier.return_value.classify_education = AsyncMock(
|
| 66 |
-
return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
|
| 67 |
-
)
|
| 68 |
-
mock_work_experience_classifier.return_value.classify_work_experience = AsyncMock(
|
| 69 |
-
side_effect=[
|
| 70 |
-
WorkExperienceClassification(
|
| 71 |
-
primary_job_type=PrimaryJobType.FULL_TIME,
|
| 72 |
-
secondary_job_type=SecondaryJobType.INVESTMENT_BANKING,
|
| 73 |
-
confidence=1.0,
|
| 74 |
-
reasoning="Test",
|
| 75 |
-
),
|
| 76 |
-
WorkExperienceClassification(
|
| 77 |
-
primary_job_type=PrimaryJobType.FULL_TIME,
|
| 78 |
-
secondary_job_type=SecondaryJobType.INVESTING,
|
| 79 |
-
confidence=1.0,
|
| 80 |
-
reasoning="Test",
|
| 81 |
-
),
|
| 82 |
-
WorkExperienceClassification(
|
| 83 |
-
primary_job_type=PrimaryJobType.FULL_TIME,
|
| 84 |
-
secondary_job_type=SecondaryJobType.ENGINEERING,
|
| 85 |
-
confidence=1.0,
|
| 86 |
-
reasoning="Test",
|
| 87 |
-
),
|
| 88 |
-
]
|
| 89 |
-
)
|
| 90 |
-
mock_investment_banking_group_classifier.return_value.classify_investment_banking_group = AsyncMock(
|
| 91 |
-
return_value=InvestmentBankingGroupClassification(
|
| 92 |
-
investment_banking_group=InvestmentBankingGroup.M_AND_A, confidence=1.0, reasoning="Test"
|
| 93 |
-
)
|
| 94 |
-
)
|
| 95 |
-
mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class = AsyncMock(
|
| 96 |
-
return_value=InvestingFocusAssetClassClassification(
|
| 97 |
-
investing_focus_asset_class=InvestingFocusAssetClass.EARLY_STAGE_VC,
|
| 98 |
-
confidence=1.0,
|
| 99 |
-
reasoning="Test",
|
| 100 |
-
)
|
| 101 |
-
)
|
| 102 |
-
mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector = AsyncMock(
|
| 103 |
-
return_value=InvestingFocusSectorClassification(
|
| 104 |
-
investing_focus_sector=InvestingFocusSector.TECHNOLOGY_SOFTWARE_TMT,
|
| 105 |
-
confidence=1.0,
|
| 106 |
-
reasoning="Test",
|
| 107 |
-
)
|
| 108 |
-
)
|
| 109 |
-
|
| 110 |
-
result = await process_linkedin_profile(sample_linkedin_profile)
|
| 111 |
-
|
| 112 |
-
assert isinstance(result, LinkedinProfileClassificationResults)
|
| 113 |
-
assert len(result.classified_educations) == 1
|
| 114 |
-
assert len(result.classified_work_experiences) == 3
|
| 115 |
-
|
| 116 |
-
# Check investment banking position
|
| 117 |
-
ib_experience = result.classified_work_experiences[0]
|
| 118 |
-
assert ib_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING
|
| 119 |
-
assert ib_experience.investment_banking_classification is not None
|
| 120 |
-
assert (
|
| 121 |
-
ib_experience.investment_banking_classification.investment_banking_group == InvestmentBankingGroup.M_AND_A
|
| 122 |
-
)
|
| 123 |
-
assert ib_experience.investing_focus_asset_class_classification is None
|
| 124 |
-
assert ib_experience.investing_focus_sector_classification is None
|
| 125 |
-
|
| 126 |
-
# Check investing position
|
| 127 |
-
investing_experience = result.classified_work_experiences[1]
|
| 128 |
-
assert investing_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTING
|
| 129 |
-
assert investing_experience.investment_banking_classification is None
|
| 130 |
-
assert investing_experience.investing_focus_asset_class_classification is not None
|
| 131 |
-
assert (
|
| 132 |
-
investing_experience.investing_focus_asset_class_classification.investing_focus_asset_class
|
| 133 |
-
== InvestingFocusAssetClass.EARLY_STAGE_VC
|
| 134 |
-
)
|
| 135 |
-
assert investing_experience.investing_focus_sector_classification is not None
|
| 136 |
-
assert (
|
| 137 |
-
investing_experience.investing_focus_sector_classification.investing_focus_sector
|
| 138 |
-
== InvestingFocusSector.TECHNOLOGY_SOFTWARE_TMT
|
| 139 |
-
)
|
| 140 |
-
|
| 141 |
-
# Check engineering position
|
| 142 |
-
eng_experience = result.classified_work_experiences[2]
|
| 143 |
-
assert eng_experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
|
| 144 |
-
assert eng_experience.investment_banking_classification is None
|
| 145 |
-
assert eng_experience.investing_focus_asset_class_classification is None
|
| 146 |
-
assert eng_experience.investing_focus_sector_classification is None
|
| 147 |
-
|
| 148 |
-
# Check that the classifiers were called the correct number of times
|
| 149 |
-
assert mock_education_classifier.return_value.classify_education.call_count == 1
|
| 150 |
-
assert mock_work_experience_classifier.return_value.classify_work_experience.call_count == 3
|
| 151 |
-
assert mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.call_count == 1
|
| 152 |
-
assert (
|
| 153 |
-
mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class.call_count
|
| 154 |
-
== 1
|
| 155 |
-
)
|
| 156 |
-
assert mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector.call_count == 1
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
@pytest.mark.asyncio
|
| 160 |
-
async def test_process_linkedin_profile_no_investing(sample_linkedin_profile):
|
| 161 |
-
sample_linkedin_profile.positions = [
|
| 162 |
-
Position(
|
| 163 |
-
title="Software Engineer",
|
| 164 |
-
company_name="Tech Corp",
|
| 165 |
-
)
|
| 166 |
-
]
|
| 167 |
-
|
| 168 |
-
with (
|
| 169 |
-
patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
|
| 170 |
-
patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
|
| 171 |
-
patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
|
| 172 |
-
patch("vsp.app.main.InvestingFocusAssetClassClassifier") as mock_investing_focus_asset_class_classifier,
|
| 173 |
-
patch("vsp.app.main.InvestingFocusSectorClassifier") as mock_investing_focus_sector_classifier,
|
| 174 |
-
):
|
| 175 |
-
mock_education_classifier.return_value.classify_education = AsyncMock(
|
| 176 |
-
return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
|
| 177 |
-
)
|
| 178 |
-
mock_work_experience_classifier.return_value.classify_work_experience = AsyncMock(
|
| 179 |
-
return_value=WorkExperienceClassification(
|
| 180 |
-
primary_job_type=PrimaryJobType.FULL_TIME,
|
| 181 |
-
secondary_job_type=SecondaryJobType.ENGINEERING,
|
| 182 |
-
confidence=1.0,
|
| 183 |
-
reasoning="Test",
|
| 184 |
-
)
|
| 185 |
-
)
|
| 186 |
-
|
| 187 |
-
result = await process_linkedin_profile(sample_linkedin_profile)
|
| 188 |
-
|
| 189 |
-
assert isinstance(result, LinkedinProfileClassificationResults)
|
| 190 |
-
assert len(result.classified_educations) == 1
|
| 191 |
-
assert len(result.classified_work_experiences) == 1
|
| 192 |
-
|
| 193 |
-
work_experience = result.classified_work_experiences[0]
|
| 194 |
-
assert work_experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
|
| 195 |
-
assert work_experience.investment_banking_classification is None
|
| 196 |
-
assert work_experience.investing_focus_asset_class_classification is None
|
| 197 |
-
assert work_experience.investing_focus_sector_classification is None
|
| 198 |
-
|
| 199 |
-
# ensure investment banking, investing focus asset class, and investing focus sector classifiers were not called
|
| 200 |
-
mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.assert_not_called()
|
| 201 |
-
mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class.assert_not_called()
|
| 202 |
-
mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector.assert_not_called()
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
if __name__ == "__main__":
|
| 206 |
-
pytest.main([__file__])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|