navkast
commited on
Add investment banking classifier (#2)
Browse files* Add investment banking classifier
* fix ipynb
- .env +8 -0
- .gitignore +0 -1
- .vscode/settings.json +3 -1
- pyproject.toml +1 -2
- run.py +6 -0
- src/notebooks/{education_classifier.ipynb β classifiers/education_classifier.ipynb} +3 -3
- src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb +81 -0
- src/notebooks/{work_experience_classifier.ipynb β classifiers/work_experience/work_experience_classifier.ipynb} +3 -10
- src/notebooks/data/eric_armagost.json +395 -0
- src/vsp/app/classifiers/__init__.py +0 -0
- src/vsp/app/{education_classifier.py β classifiers/education_classifier.py} +0 -0
- src/vsp/app/classifiers/work_experience/__init__.py +0 -0
- src/vsp/app/{work_experience_classifier.py β classifiers/work_experience/general_work_experience_classifier.py} +0 -0
- src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py +170 -0
- src/vsp/app/main.py +163 -0
- src/vsp/app/model/linkedin/linkedin_models.py +14 -14
- src/vsp/app/prompts/prompt_loader.py +6 -5
- src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_human.txt +9 -0
- src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_system.txt +36 -0
- tests/app/{test_education_classifier.py β classifiers/test_education_classifier.py} +1 -1
- tests/app/classifiers/work_experience/test_investment_banking_group_classifier.py +133 -0
- tests/app/{test_work_experience_classifier.py β classifiers/work_experience/test_work_experience_classifier.py} +2 -2
- tests/app/prompts/test_prompt_loader.py +44 -9
- tests/app/prompts/test_prompts/basic_test/1 - test_user.txt +0 -1
- tests/app/prompts/test_prompts/basic_test/nested/1 - nested_test_human.txt +1 -0
.env
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### AIDER
|
| 2 |
+
|
| 3 |
+
AIDER_TEST_CMD=make test
|
| 4 |
+
AIDER_LINT_CMD=make format
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
### PYTHON
|
| 8 |
+
PYTHONPATH=src
|
.gitignore
CHANGED
|
@@ -122,7 +122,6 @@ celerybeat.pid
|
|
| 122 |
*.sage.py
|
| 123 |
|
| 124 |
# Environments
|
| 125 |
-
.env
|
| 126 |
.venv
|
| 127 |
env/
|
| 128 |
venv/
|
|
|
|
| 122 |
*.sage.py
|
| 123 |
|
| 124 |
# Environments
|
|
|
|
| 125 |
.venv
|
| 126 |
env/
|
| 127 |
venv/
|
.vscode/settings.json
CHANGED
|
@@ -12,5 +12,7 @@
|
|
| 12 |
"tests"
|
| 13 |
],
|
| 14 |
"python.testing.unittestEnabled": false,
|
| 15 |
-
"python.testing.pytestEnabled": true
|
|
|
|
|
|
|
| 16 |
}
|
|
|
|
| 12 |
"tests"
|
| 13 |
],
|
| 14 |
"python.testing.unittestEnabled": false,
|
| 15 |
+
"python.testing.pytestEnabled": true,
|
| 16 |
+
"python.envFile": "${workspaceFolder}/.env",
|
| 17 |
+
"python.analysis.extraPaths": ["${workspaceFolder}/src"]
|
| 18 |
}
|
pyproject.toml
CHANGED
|
@@ -32,7 +32,7 @@ dev = [
|
|
| 32 |
]
|
| 33 |
|
| 34 |
[tool.hatch.build.targets.wheel]
|
| 35 |
-
packages = ["src/
|
| 36 |
|
| 37 |
[tool.ruff]
|
| 38 |
lint.select = ["E", "F", "I", "N"]
|
|
@@ -57,7 +57,6 @@ namespace_packages = true
|
|
| 57 |
explicit_package_bases = true
|
| 58 |
enable_incomplete_feature = ["NewGenericSyntax"]
|
| 59 |
|
| 60 |
-
|
| 61 |
[build-system]
|
| 62 |
requires = ["hatchling"]
|
| 63 |
build-backend = "hatchling.build"
|
|
|
|
| 32 |
]
|
| 33 |
|
| 34 |
[tool.hatch.build.targets.wheel]
|
| 35 |
+
packages = ["src/vsp"]
|
| 36 |
|
| 37 |
[tool.ruff]
|
| 38 |
lint.select = ["E", "F", "I", "N"]
|
|
|
|
| 57 |
explicit_package_bases = true
|
| 58 |
enable_incomplete_feature = ["NewGenericSyntax"]
|
| 59 |
|
|
|
|
| 60 |
[build-system]
|
| 61 |
requires = ["hatchling"]
|
| 62 |
build-backend = "hatchling.build"
|
run.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
|
| 3 |
+
from vsp.app.main import main
|
| 4 |
+
|
| 5 |
+
if __name__ == "__main__":
|
| 6 |
+
asyncio.run(main())
|
src/notebooks/{education_classifier.ipynb β classifiers/education_classifier.ipynb}
RENAMED
|
@@ -8,13 +8,13 @@
|
|
| 8 |
"source": [
|
| 9 |
"import json\n",
|
| 10 |
"\n",
|
| 11 |
-
"from vsp.app.model.linkedin.linkedin_models import
|
| 12 |
"\n",
|
| 13 |
"with open(\"data/hansae_catlett.json\") as f:\n",
|
| 14 |
" data = json.load(f)\n",
|
| 15 |
" # convert to linkedin profile\n",
|
| 16 |
"\n",
|
| 17 |
-
"profile = profile_from_json(data)"
|
| 18 |
]
|
| 19 |
},
|
| 20 |
{
|
|
@@ -80,7 +80,7 @@
|
|
| 80 |
"source": [
|
| 81 |
"import asyncio\n",
|
| 82 |
"\n",
|
| 83 |
-
"from vsp.app.education_classifier import EducationClassifier\n",
|
| 84 |
"\n",
|
| 85 |
"education_classifier = EducationClassifier()\n",
|
| 86 |
"\n",
|
|
|
|
| 8 |
"source": [
|
| 9 |
"import json\n",
|
| 10 |
"\n",
|
| 11 |
+
"from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
|
| 12 |
"\n",
|
| 13 |
"with open(\"data/hansae_catlett.json\") as f:\n",
|
| 14 |
" data = json.load(f)\n",
|
| 15 |
" # convert to linkedin profile\n",
|
| 16 |
"\n",
|
| 17 |
+
"profile = LinkedinProfile.profile_from_json(data)"
|
| 18 |
]
|
| 19 |
},
|
| 20 |
{
|
|
|
|
| 80 |
"source": [
|
| 81 |
"import asyncio\n",
|
| 82 |
"\n",
|
| 83 |
+
"from vsp.app.classifiers.education_classifier import EducationClassifier\n",
|
| 84 |
"\n",
|
| 85 |
"education_classifier = EducationClassifier()\n",
|
| 86 |
"\n",
|
src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import os\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"os.getcwd()\n",
|
| 12 |
+
"os.chdir(path=os.getcwd() + \"/../../../\")"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"execution_count": null,
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [],
|
| 20 |
+
"source": [
|
| 21 |
+
"import json\n",
|
| 22 |
+
"\n",
|
| 23 |
+
"from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
|
| 24 |
+
"\n",
|
| 25 |
+
"print(os.getcwd())\n",
|
| 26 |
+
"\n",
|
| 27 |
+
"with open(\"notebooks/data/eric_armagost.json\") as f:\n",
|
| 28 |
+
" data = json.load(f)\n",
|
| 29 |
+
"\n",
|
| 30 |
+
"profile = LinkedinProfile.profile_from_json(data)"
|
| 31 |
+
]
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"cell_type": "code",
|
| 35 |
+
"execution_count": null,
|
| 36 |
+
"metadata": {},
|
| 37 |
+
"outputs": [],
|
| 38 |
+
"source": [
|
| 39 |
+
"import asyncio\n",
|
| 40 |
+
"\n",
|
| 41 |
+
"from vsp.app.classifiers.work_experience.investment_banking_group_classifier import InvestmentBankingGroupClassifier\n",
|
| 42 |
+
"\n",
|
| 43 |
+
"investment_banking_classifier = InvestmentBankingGroupClassifier()\n",
|
| 44 |
+
"\n",
|
| 45 |
+
"all_positions_classified = []\n",
|
| 46 |
+
"\n",
|
| 47 |
+
"\n",
|
| 48 |
+
"async def classify_investment_banking_group(profile, position):\n",
|
| 49 |
+
" classification = await investment_banking_classifier.classify_investment_banking_group(profile, position)\n",
|
| 50 |
+
" all_positions_classified.append(classification)\n",
|
| 51 |
+
"\n",
|
| 52 |
+
"\n",
|
| 53 |
+
"await asyncio.gather(*[classify_investment_banking_group(profile, position) for position in profile.positions])\n",
|
| 54 |
+
"\n",
|
| 55 |
+
"for classification in all_positions_classified:\n",
|
| 56 |
+
" print(classification.model_dump_json(indent=2))"
|
| 57 |
+
]
|
| 58 |
+
}
|
| 59 |
+
],
|
| 60 |
+
"metadata": {
|
| 61 |
+
"kernelspec": {
|
| 62 |
+
"display_name": ".venv",
|
| 63 |
+
"language": "python",
|
| 64 |
+
"name": "python3"
|
| 65 |
+
},
|
| 66 |
+
"language_info": {
|
| 67 |
+
"codemirror_mode": {
|
| 68 |
+
"name": "ipython",
|
| 69 |
+
"version": 3
|
| 70 |
+
},
|
| 71 |
+
"file_extension": ".py",
|
| 72 |
+
"mimetype": "text/x-python",
|
| 73 |
+
"name": "python",
|
| 74 |
+
"nbconvert_exporter": "python",
|
| 75 |
+
"pygments_lexer": "ipython3",
|
| 76 |
+
"version": "3.12.5"
|
| 77 |
+
}
|
| 78 |
+
},
|
| 79 |
+
"nbformat": 4,
|
| 80 |
+
"nbformat_minor": 2
|
| 81 |
+
}
|
src/notebooks/{work_experience_classifier.ipynb β classifiers/work_experience/work_experience_classifier.ipynb}
RENAMED
|
@@ -12,13 +12,6 @@
|
|
| 12 |
"os.chdir(path=os.getcwd() + \"/../\")"
|
| 13 |
]
|
| 14 |
},
|
| 15 |
-
{
|
| 16 |
-
"cell_type": "code",
|
| 17 |
-
"execution_count": null,
|
| 18 |
-
"metadata": {},
|
| 19 |
-
"outputs": [],
|
| 20 |
-
"source": []
|
| 21 |
-
},
|
| 22 |
{
|
| 23 |
"cell_type": "code",
|
| 24 |
"execution_count": 2,
|
|
@@ -27,13 +20,13 @@
|
|
| 27 |
"source": [
|
| 28 |
"import json\n",
|
| 29 |
"\n",
|
| 30 |
-
"from vsp.app.model.linkedin.linkedin_models import
|
| 31 |
"\n",
|
| 32 |
"with open(\"notebooks/data/hansae_catlett.json\") as f:\n",
|
| 33 |
" data = json.load(f)\n",
|
| 34 |
" # convert to linkedin profile\n",
|
| 35 |
"\n",
|
| 36 |
-
"profile = profile_from_json(data)"
|
| 37 |
]
|
| 38 |
},
|
| 39 |
{
|
|
@@ -136,7 +129,7 @@
|
|
| 136 |
"source": [
|
| 137 |
"import asyncio\n",
|
| 138 |
"\n",
|
| 139 |
-
"from vsp.app.
|
| 140 |
"\n",
|
| 141 |
"work_experience_classifier = WorkExperienceClassifier()\n",
|
| 142 |
"\n",
|
|
|
|
| 12 |
"os.chdir(path=os.getcwd() + \"/../\")"
|
| 13 |
]
|
| 14 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
{
|
| 16 |
"cell_type": "code",
|
| 17 |
"execution_count": 2,
|
|
|
|
| 20 |
"source": [
|
| 21 |
"import json\n",
|
| 22 |
"\n",
|
| 23 |
+
"from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
|
| 24 |
"\n",
|
| 25 |
"with open(\"notebooks/data/hansae_catlett.json\") as f:\n",
|
| 26 |
" data = json.load(f)\n",
|
| 27 |
" # convert to linkedin profile\n",
|
| 28 |
"\n",
|
| 29 |
+
"profile = LinkedinProfile.profile_from_json(data)"
|
| 30 |
]
|
| 31 |
},
|
| 32 |
{
|
|
|
|
| 129 |
"source": [
|
| 130 |
"import asyncio\n",
|
| 131 |
"\n",
|
| 132 |
+
"from vsp.app.classifiers.work_experience.general_work_experience_classifier import WorkExperienceClassifier\n",
|
| 133 |
"\n",
|
| 134 |
"work_experience_classifier = WorkExperienceClassifier()\n",
|
| 135 |
"\n",
|
src/notebooks/data/eric_armagost.json
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"urn": "ACoAAApkrrAB8nFEIP2l00BAXgSQ78iUNprebWc",
|
| 3 |
+
"username": "eric-armagost-a144904a",
|
| 4 |
+
"firstName": "Eric",
|
| 5 |
+
"lastName": "Armagost",
|
| 6 |
+
"isCreator": false,
|
| 7 |
+
"isOpenToWork": false,
|
| 8 |
+
"isHiring": false,
|
| 9 |
+
"profilePicture": "https://media.licdn.com/dms/image/v2/C5603AQGiv3LeddNxgQ/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1656093036751?e=1730332800&v=beta&t=HruxzTkWpJZ9iro3k20ZKxPXHBerz1altnRU3PPXdUI",
|
| 10 |
+
"backgroundImage": null,
|
| 11 |
+
"summary": "",
|
| 12 |
+
"headline": "Investor at Accel-KKR",
|
| 13 |
+
"geo": {
|
| 14 |
+
"country": "United States",
|
| 15 |
+
"city": "San Francisco, California",
|
| 16 |
+
"full": "San Francisco, California, United States"
|
| 17 |
+
},
|
| 18 |
+
"languages": [
|
| 19 |
+
{
|
| 20 |
+
"name": "German",
|
| 21 |
+
"proficiency": "LIMITED_WORKING"
|
| 22 |
+
}
|
| 23 |
+
],
|
| 24 |
+
"educations": [
|
| 25 |
+
{
|
| 26 |
+
"start": {
|
| 27 |
+
"year": 0,
|
| 28 |
+
"month": 0,
|
| 29 |
+
"day": 0
|
| 30 |
+
},
|
| 31 |
+
"end": {
|
| 32 |
+
"year": 0,
|
| 33 |
+
"month": 0,
|
| 34 |
+
"day": 0
|
| 35 |
+
},
|
| 36 |
+
"fieldOfStudy": "Business Economics",
|
| 37 |
+
"degree": "Bachelor of Arts (B.A.)",
|
| 38 |
+
"grade": "",
|
| 39 |
+
"schoolName": "Brown University",
|
| 40 |
+
"description": "",
|
| 41 |
+
"activities": "",
|
| 42 |
+
"url": "https://www.linkedin.com/school/brown-university/",
|
| 43 |
+
"schoolId": "157343"
|
| 44 |
+
}
|
| 45 |
+
],
|
| 46 |
+
"position": [
|
| 47 |
+
{
|
| 48 |
+
"companyId": 57752,
|
| 49 |
+
"companyName": "Accel-KKR",
|
| 50 |
+
"companyUsername": "accel-kkr",
|
| 51 |
+
"companyURL": "https://www.linkedin.com/company/accel-kkr/",
|
| 52 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQHJ-Smp3x90Yg/company-logo_400_400/company-logo_400_400/0/1630565829245/accel_kkr_logo?e=1733356800&v=beta&t=MuqS5XhM4c0BNCvk0cCsWIE5YzOWMu7HaFpld467P0w",
|
| 53 |
+
"companyIndustry": "Venture Capital & Private Equity",
|
| 54 |
+
"companyStaffCountRange": "51 - 200",
|
| 55 |
+
"title": "Investment Professional",
|
| 56 |
+
"multiLocaleTitle": {
|
| 57 |
+
"en_US": "Investment Professional"
|
| 58 |
+
},
|
| 59 |
+
"multiLocaleCompanyName": {
|
| 60 |
+
"en_US": "Accel-KKR"
|
| 61 |
+
},
|
| 62 |
+
"location": "Menlo Park, California",
|
| 63 |
+
"description": "Founded in 2000, Accel-KKR is a leading technology-focused private equity firm dedicated exclusively to investing in software and technology-enabled services companies. \n\nOur typical transactions include: \n\u2022 Acquisitions and recapitalizations of founder-owned or closely-held private companies \n\u2022 Buyouts of divisions, subsidiaries and business units from public companies \n\u2022 Take-private transactions of small public companies\n\u2022 Structured minority equity and debt investments\n\nwww.accel-kkr.com",
|
| 64 |
+
"employmentType": "",
|
| 65 |
+
"start": {
|
| 66 |
+
"year": 2017,
|
| 67 |
+
"month": 5,
|
| 68 |
+
"day": 0
|
| 69 |
+
},
|
| 70 |
+
"end": {
|
| 71 |
+
"year": 0,
|
| 72 |
+
"month": 0,
|
| 73 |
+
"day": 0
|
| 74 |
+
}
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"companyId": 166939,
|
| 78 |
+
"companyName": "William Blair & Company",
|
| 79 |
+
"companyUsername": "william-blair-company",
|
| 80 |
+
"companyURL": "https://www.linkedin.com/company/william-blair-company/",
|
| 81 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQE63WLd1DEgkw/company-logo_400_400/company-logo_400_400/0/1669143293900/william_blair__company_logo?e=1733356800&v=beta&t=lpEb9QzV4aUl3rBk_cyFI2qbyc3fVHQohUoO7kju8PM",
|
| 82 |
+
"companyIndustry": "Financial Services",
|
| 83 |
+
"companyStaffCountRange": "1001 - 5000",
|
| 84 |
+
"title": "Investment Banking",
|
| 85 |
+
"multiLocaleTitle": {
|
| 86 |
+
"en_US": "Investment Banking"
|
| 87 |
+
},
|
| 88 |
+
"multiLocaleCompanyName": {
|
| 89 |
+
"en_US": "William Blair & Company"
|
| 90 |
+
},
|
| 91 |
+
"location": "Chicago",
|
| 92 |
+
"description": "",
|
| 93 |
+
"employmentType": "",
|
| 94 |
+
"start": {
|
| 95 |
+
"year": 2015,
|
| 96 |
+
"month": 6,
|
| 97 |
+
"day": 0
|
| 98 |
+
},
|
| 99 |
+
"end": {
|
| 100 |
+
"year": 2017,
|
| 101 |
+
"month": 5,
|
| 102 |
+
"day": 0
|
| 103 |
+
}
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"companyId": 1307,
|
| 107 |
+
"companyName": "Fidelity Investments",
|
| 108 |
+
"companyUsername": "fidelity-investments",
|
| 109 |
+
"companyURL": "https://www.linkedin.com/company/fidelity-investments/",
|
| 110 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQF6iwa5VSk-PQ/company-logo_400_400/company-logo_400_400/0/1724950881853/fidelity_investments_logo?e=1733356800&v=beta&t=aUkLhTRKySJtcU9mrvOCDYVYFVH2jStpOVaepBbdO80",
|
| 111 |
+
"companyIndustry": "Financial Services",
|
| 112 |
+
"companyStaffCountRange": "10001 - 0",
|
| 113 |
+
"title": "FFAS Corporate Finance",
|
| 114 |
+
"multiLocaleTitle": {
|
| 115 |
+
"en_US": "FFAS Corporate Finance"
|
| 116 |
+
},
|
| 117 |
+
"multiLocaleCompanyName": {
|
| 118 |
+
"en_US": "Fidelity Investments"
|
| 119 |
+
},
|
| 120 |
+
"location": "Smithfield, RI",
|
| 121 |
+
"description": "",
|
| 122 |
+
"employmentType": "",
|
| 123 |
+
"start": {
|
| 124 |
+
"year": 2014,
|
| 125 |
+
"month": 6,
|
| 126 |
+
"day": 0
|
| 127 |
+
},
|
| 128 |
+
"end": {
|
| 129 |
+
"year": 2015,
|
| 130 |
+
"month": 5,
|
| 131 |
+
"day": 0
|
| 132 |
+
}
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"companyId": 1307,
|
| 136 |
+
"companyName": "Fidelity Investments",
|
| 137 |
+
"companyUsername": "fidelity-investments",
|
| 138 |
+
"companyURL": "https://www.linkedin.com/company/fidelity-investments/",
|
| 139 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQF6iwa5VSk-PQ/company-logo_400_400/company-logo_400_400/0/1724950881853/fidelity_investments_logo?e=1733356800&v=beta&t=aUkLhTRKySJtcU9mrvOCDYVYFVH2jStpOVaepBbdO80",
|
| 140 |
+
"companyIndustry": "Financial Services",
|
| 141 |
+
"companyStaffCountRange": "10001 - 0",
|
| 142 |
+
"title": "PI Corporate Finance",
|
| 143 |
+
"multiLocaleTitle": {
|
| 144 |
+
"en_US": "PI Corporate Finance"
|
| 145 |
+
},
|
| 146 |
+
"multiLocaleCompanyName": {
|
| 147 |
+
"en_US": "Fidelity Investments"
|
| 148 |
+
},
|
| 149 |
+
"location": "Smithfield, RI",
|
| 150 |
+
"description": "",
|
| 151 |
+
"employmentType": "",
|
| 152 |
+
"start": {
|
| 153 |
+
"year": 2013,
|
| 154 |
+
"month": 6,
|
| 155 |
+
"day": 0
|
| 156 |
+
},
|
| 157 |
+
"end": {
|
| 158 |
+
"year": 2014,
|
| 159 |
+
"month": 5,
|
| 160 |
+
"day": 0
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
],
|
| 164 |
+
"fullPositions": [
|
| 165 |
+
{
|
| 166 |
+
"companyId": 57752,
|
| 167 |
+
"companyName": "Accel-KKR",
|
| 168 |
+
"companyUsername": "accel-kkr",
|
| 169 |
+
"companyURL": "https://www.linkedin.com/company/accel-kkr/",
|
| 170 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQHJ-Smp3x90Yg/company-logo_400_400/company-logo_400_400/0/1630565829245/accel_kkr_logo?e=1733356800&v=beta&t=MuqS5XhM4c0BNCvk0cCsWIE5YzOWMu7HaFpld467P0w",
|
| 171 |
+
"companyIndustry": "Venture Capital & Private Equity",
|
| 172 |
+
"companyStaffCountRange": "51 - 200",
|
| 173 |
+
"title": "Investment Professional",
|
| 174 |
+
"multiLocaleTitle": {
|
| 175 |
+
"en_US": "Investment Professional"
|
| 176 |
+
},
|
| 177 |
+
"multiLocaleCompanyName": {
|
| 178 |
+
"en_US": "Accel-KKR"
|
| 179 |
+
},
|
| 180 |
+
"location": "Menlo Park, California",
|
| 181 |
+
"description": "Founded in 2000, Accel-KKR is a leading technology-focused private equity firm dedicated exclusively to investing in software and technology-enabled services companies. \n\nOur typical transactions include: \n\u2022 Acquisitions and recapitalizations of founder-owned or closely-held private companies \n\u2022 Buyouts of divisions, subsidiaries and business units from public companies \n\u2022 Take-private transactions of small public companies\n\u2022 Structured minority equity and debt investments\n\nwww.accel-kkr.com",
|
| 182 |
+
"employmentType": "",
|
| 183 |
+
"start": {
|
| 184 |
+
"year": 2017,
|
| 185 |
+
"month": 5,
|
| 186 |
+
"day": 0
|
| 187 |
+
},
|
| 188 |
+
"end": {
|
| 189 |
+
"year": 0,
|
| 190 |
+
"month": 0,
|
| 191 |
+
"day": 0
|
| 192 |
+
}
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"companyId": 166939,
|
| 196 |
+
"companyName": "William Blair & Company",
|
| 197 |
+
"companyUsername": "william-blair-company",
|
| 198 |
+
"companyURL": "https://www.linkedin.com/company/william-blair-company/",
|
| 199 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQE63WLd1DEgkw/company-logo_400_400/company-logo_400_400/0/1669143293900/william_blair__company_logo?e=1733356800&v=beta&t=lpEb9QzV4aUl3rBk_cyFI2qbyc3fVHQohUoO7kju8PM",
|
| 200 |
+
"companyIndustry": "Financial Services",
|
| 201 |
+
"companyStaffCountRange": "1001 - 5000",
|
| 202 |
+
"title": "Investment Banking",
|
| 203 |
+
"multiLocaleTitle": {
|
| 204 |
+
"en_US": "Investment Banking"
|
| 205 |
+
},
|
| 206 |
+
"multiLocaleCompanyName": {
|
| 207 |
+
"en_US": "William Blair & Company"
|
| 208 |
+
},
|
| 209 |
+
"location": "Chicago",
|
| 210 |
+
"description": "",
|
| 211 |
+
"employmentType": "",
|
| 212 |
+
"start": {
|
| 213 |
+
"year": 2015,
|
| 214 |
+
"month": 6,
|
| 215 |
+
"day": 0
|
| 216 |
+
},
|
| 217 |
+
"end": {
|
| 218 |
+
"year": 2017,
|
| 219 |
+
"month": 5,
|
| 220 |
+
"day": 0
|
| 221 |
+
}
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"companyId": 1307,
|
| 225 |
+
"companyName": "Fidelity Investments",
|
| 226 |
+
"companyUsername": "fidelity-investments",
|
| 227 |
+
"companyURL": "https://www.linkedin.com/company/fidelity-investments/",
|
| 228 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQF6iwa5VSk-PQ/company-logo_400_400/company-logo_400_400/0/1724950881853/fidelity_investments_logo?e=1733356800&v=beta&t=aUkLhTRKySJtcU9mrvOCDYVYFVH2jStpOVaepBbdO80",
|
| 229 |
+
"companyIndustry": "Financial Services",
|
| 230 |
+
"companyStaffCountRange": "10001 - 0",
|
| 231 |
+
"title": "FFAS Corporate Finance",
|
| 232 |
+
"multiLocaleTitle": {
|
| 233 |
+
"en_US": "FFAS Corporate Finance"
|
| 234 |
+
},
|
| 235 |
+
"multiLocaleCompanyName": {
|
| 236 |
+
"en_US": "Fidelity Investments"
|
| 237 |
+
},
|
| 238 |
+
"location": "Smithfield, RI",
|
| 239 |
+
"description": "",
|
| 240 |
+
"employmentType": "",
|
| 241 |
+
"start": {
|
| 242 |
+
"year": 2014,
|
| 243 |
+
"month": 6,
|
| 244 |
+
"day": 0
|
| 245 |
+
},
|
| 246 |
+
"end": {
|
| 247 |
+
"year": 2015,
|
| 248 |
+
"month": 5,
|
| 249 |
+
"day": 0
|
| 250 |
+
}
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"companyId": 1307,
|
| 254 |
+
"companyName": "Fidelity Investments",
|
| 255 |
+
"companyUsername": "fidelity-investments",
|
| 256 |
+
"companyURL": "https://www.linkedin.com/company/fidelity-investments/",
|
| 257 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQF6iwa5VSk-PQ/company-logo_400_400/company-logo_400_400/0/1724950881853/fidelity_investments_logo?e=1733356800&v=beta&t=aUkLhTRKySJtcU9mrvOCDYVYFVH2jStpOVaepBbdO80",
|
| 258 |
+
"companyIndustry": "Financial Services",
|
| 259 |
+
"companyStaffCountRange": "10001 - 0",
|
| 260 |
+
"title": "PI Corporate Finance",
|
| 261 |
+
"multiLocaleTitle": {
|
| 262 |
+
"en_US": "PI Corporate Finance"
|
| 263 |
+
},
|
| 264 |
+
"multiLocaleCompanyName": {
|
| 265 |
+
"en_US": "Fidelity Investments"
|
| 266 |
+
},
|
| 267 |
+
"location": "Smithfield, RI",
|
| 268 |
+
"description": "",
|
| 269 |
+
"employmentType": "",
|
| 270 |
+
"start": {
|
| 271 |
+
"year": 2013,
|
| 272 |
+
"month": 6,
|
| 273 |
+
"day": 0
|
| 274 |
+
},
|
| 275 |
+
"end": {
|
| 276 |
+
"year": 2014,
|
| 277 |
+
"month": 5,
|
| 278 |
+
"day": 0
|
| 279 |
+
}
|
| 280 |
+
}
|
| 281 |
+
],
|
| 282 |
+
"skills": [
|
| 283 |
+
{
|
| 284 |
+
"name": "Microsoft Office",
|
| 285 |
+
"passedSkillAssessment": false,
|
| 286 |
+
"endorsementsCount": 5
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"name": "Microsoft Excel",
|
| 290 |
+
"passedSkillAssessment": false,
|
| 291 |
+
"endorsementsCount": 2
|
| 292 |
+
},
|
| 293 |
+
{
|
| 294 |
+
"name": "Microsoft Word",
|
| 295 |
+
"passedSkillAssessment": false,
|
| 296 |
+
"endorsementsCount": 7
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"name": "Customer Service",
|
| 300 |
+
"passedSkillAssessment": false
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"name": "PowerPoint",
|
| 304 |
+
"passedSkillAssessment": false,
|
| 305 |
+
"endorsementsCount": 4
|
| 306 |
+
},
|
| 307 |
+
{
|
| 308 |
+
"name": "English",
|
| 309 |
+
"passedSkillAssessment": false
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"name": "Windows",
|
| 313 |
+
"passedSkillAssessment": false
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
"name": "Research",
|
| 317 |
+
"passedSkillAssessment": false,
|
| 318 |
+
"endorsementsCount": 2
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"name": "Outlook",
|
| 322 |
+
"passedSkillAssessment": false
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"name": "Teaching",
|
| 326 |
+
"passedSkillAssessment": false
|
| 327 |
+
},
|
| 328 |
+
{
|
| 329 |
+
"name": "Photoshop",
|
| 330 |
+
"passedSkillAssessment": false
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"name": "Public Speaking",
|
| 334 |
+
"passedSkillAssessment": false,
|
| 335 |
+
"endorsementsCount": 2
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"name": "HTML",
|
| 339 |
+
"passedSkillAssessment": false
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"name": "Strategic Planning",
|
| 343 |
+
"passedSkillAssessment": false
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"name": "Budgets",
|
| 347 |
+
"passedSkillAssessment": false
|
| 348 |
+
}
|
| 349 |
+
],
|
| 350 |
+
"givenRecommendation": null,
|
| 351 |
+
"givenRecommendationCount": 0,
|
| 352 |
+
"receivedRecommendation": null,
|
| 353 |
+
"receivedRecommendationCount": 0,
|
| 354 |
+
"courses": null,
|
| 355 |
+
"certifications": null,
|
| 356 |
+
"honors": null,
|
| 357 |
+
"projects": {
|
| 358 |
+
"total": 0,
|
| 359 |
+
"items": null
|
| 360 |
+
},
|
| 361 |
+
"volunteering": [
|
| 362 |
+
{
|
| 363 |
+
"title": "Fundraiser Leader",
|
| 364 |
+
"start": {
|
| 365 |
+
"year": 2014,
|
| 366 |
+
"month": 7,
|
| 367 |
+
"day": 0
|
| 368 |
+
},
|
| 369 |
+
"end": {
|
| 370 |
+
"year": 2014,
|
| 371 |
+
"month": 7,
|
| 372 |
+
"day": 0
|
| 373 |
+
},
|
| 374 |
+
"companyName": "AHA",
|
| 375 |
+
"CompanyId": "",
|
| 376 |
+
"companyUrl": "",
|
| 377 |
+
"companyLogo": ""
|
| 378 |
+
}
|
| 379 |
+
],
|
| 380 |
+
"supportedLocales": [
|
| 381 |
+
{
|
| 382 |
+
"country": "US",
|
| 383 |
+
"language": "en"
|
| 384 |
+
}
|
| 385 |
+
],
|
| 386 |
+
"multiLocaleFirstName": {
|
| 387 |
+
"en": "Eric"
|
| 388 |
+
},
|
| 389 |
+
"multiLocaleLastName": {
|
| 390 |
+
"en": "Armagost"
|
| 391 |
+
},
|
| 392 |
+
"multiLocaleHeadline": {
|
| 393 |
+
"en": "Investor at Accel-KKR"
|
| 394 |
+
}
|
| 395 |
+
}
|
src/vsp/app/classifiers/__init__.py
ADDED
|
File without changes
|
src/vsp/app/{education_classifier.py β classifiers/education_classifier.py}
RENAMED
|
File without changes
|
src/vsp/app/classifiers/work_experience/__init__.py
ADDED
|
File without changes
|
src/vsp/app/{work_experience_classifier.py β classifiers/work_experience/general_work_experience_classifier.py}
RENAMED
|
File without changes
|
src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
investment_banking_group_classifier.py
|
| 3 |
+
|
| 4 |
+
This module provides functionality for classifying investment banking groups
|
| 5 |
+
based on work experiences from LinkedIn profiles. It uses a language model to
|
| 6 |
+
determine the specific investment banking group a position belongs to.
|
| 7 |
+
|
| 8 |
+
Classes:
|
| 9 |
+
InvestmentBankingGroup: Enum representing different investment banking groups.
|
| 10 |
+
InvestmentBankingGroupClassification: Pydantic model for classification results.
|
| 11 |
+
InvestmentBankingGroupClassifier: Main class for classifying investment banking groups.
|
| 12 |
+
|
| 13 |
+
Usage:
|
| 14 |
+
classifier = InvestmentBankingGroupClassifier()
|
| 15 |
+
classification = await classifier.classify_investment_banking_group(linkedin_profile, work_experience)
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from enum import Enum
|
| 19 |
+
from typing import Any, Final
|
| 20 |
+
|
| 21 |
+
from pydantic import BaseModel, Field
|
| 22 |
+
|
| 23 |
+
from vsp.app import bindings
|
| 24 |
+
from vsp.app.model.linkedin.linkedin_formatters import format_position, format_profile_as_resume
|
| 25 |
+
from vsp.app.model.linkedin.linkedin_models import LinkedinProfile, Position
|
| 26 |
+
from vsp.app.prompts.prompt_loader import PromptLoader
|
| 27 |
+
from vsp.llm.llm_service import LLMService
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class InvestmentBankingGroup(str, Enum):
|
| 31 |
+
"""Enumeration of different investment banking groups."""
|
| 32 |
+
|
| 33 |
+
GENERALIST = "Generalist"
|
| 34 |
+
M_AND_A = "M&A"
|
| 35 |
+
LEVERAGED_FINANCE = "Leveraged Finance"
|
| 36 |
+
FINANCIAL_SPONSORS = "Financial Sponsors"
|
| 37 |
+
EQUITY_CAPITAL_MARKETS = "Equity Capital Markets"
|
| 38 |
+
DEBT_CAPITAL_MARKETS = "Debt Capital Markets"
|
| 39 |
+
RESTRUCTURING = "Restructuring"
|
| 40 |
+
PRIVATE_FUNDS = "Private Funds"
|
| 41 |
+
STRUCTURED_FINANCE = "Structured Finance"
|
| 42 |
+
HEALTHCARE = "Healthcare"
|
| 43 |
+
INDUSTRIALS = "Industrials"
|
| 44 |
+
BUSINESS_SERVICES = "Business Services"
|
| 45 |
+
CONSUMER_RETAIL = "Consumer & Retail"
|
| 46 |
+
ENERGY_NATURAL_RESOURCES = "Energy / Natural Resources / Cleantech / Utilities"
|
| 47 |
+
REAL_ESTATE_LODGINGS = "Real Estate, Gaming & Lodging"
|
| 48 |
+
TECHNOLOGY_MEDIA_TELECOM = "Technology / Software / TMT"
|
| 49 |
+
MEDIA_ENTERTAINMENT = "Media & Entertainment"
|
| 50 |
+
FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
|
| 51 |
+
INFRASTRUCTURE = "Infrastructure / Transportation"
|
| 52 |
+
OTHER = "Other"
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
_INVESTMENT_BANKING_GROUP_MAPPINGS: Final[dict[str, InvestmentBankingGroup]] = {
|
| 56 |
+
group.name: group for group in InvestmentBankingGroup
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class InvestmentBankingGroupClassification(BaseModel):
|
| 61 |
+
"""
|
| 62 |
+
Pydantic model representing the classification result for an investment banking group.
|
| 63 |
+
|
| 64 |
+
Attributes:
|
| 65 |
+
investment_banking_group (InvestmentBankingGroup): The classified investment banking group.
|
| 66 |
+
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 67 |
+
reasoning (str): Explanation for the classification decision.
|
| 68 |
+
"""
|
| 69 |
+
|
| 70 |
+
investment_banking_group: InvestmentBankingGroup = Field(description="The investment banking group")
|
| 71 |
+
confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
|
| 72 |
+
reasoning: str = Field(description="Explanation for the classification")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class InvestmentBankingGroupClassifier:
|
| 76 |
+
"""
|
| 77 |
+
A class for classifying investment banking groups based on work experiences from LinkedIn profiles.
|
| 78 |
+
|
| 79 |
+
This classifier uses a language model to determine the specific investment banking group
|
| 80 |
+
a position belongs to based on the information provided in a LinkedIn profile and specific work experience.
|
| 81 |
+
|
| 82 |
+
Attributes:
|
| 83 |
+
_llm_service (LLMService): The language model service used for classification.
|
| 84 |
+
_prompt_template (Any): The template for generating prompts for the language model.
|
| 85 |
+
_prompt_loader (PromptLoader): The loader for prompt templates.
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
def __init__(
|
| 89 |
+
self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
|
| 90 |
+
) -> None:
|
| 91 |
+
"""
|
| 92 |
+
Initialize the InvestmentBankingGroupClassifier.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
llm_service (LLMService, optional): The language model service to use.
|
| 96 |
+
Defaults to the OpenAI service defined in bindings.
|
| 97 |
+
prompt_loader (PromptLoader, optional): The prompt loader to use.
|
| 98 |
+
Defaults to the prompt loader defined in bindings.
|
| 99 |
+
"""
|
| 100 |
+
self._llm_service = llm_service
|
| 101 |
+
self._prompt_template = prompt_loader.load_template(
|
| 102 |
+
"work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier"
|
| 103 |
+
)
|
| 104 |
+
self._prompt_loader = prompt_loader
|
| 105 |
+
|
| 106 |
+
@staticmethod
|
| 107 |
+
def _parse_output(output: str) -> InvestmentBankingGroupClassification:
|
| 108 |
+
"""
|
| 109 |
+
Parse the output from the language model into an InvestmentBankingGroupClassification object.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
output (str): The raw output string from the language model.
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
InvestmentBankingGroupClassification: A structured representation of the classification result.
|
| 116 |
+
|
| 117 |
+
Raises:
|
| 118 |
+
ValueError: If the output contains an unknown investment banking group or invalid confidence value.
|
| 119 |
+
"""
|
| 120 |
+
lines = output.strip().split("\n")
|
| 121 |
+
parsed: dict[str, Any] = {}
|
| 122 |
+
for line in lines:
|
| 123 |
+
key, value = line.split(":", 1)
|
| 124 |
+
parsed[key.strip()] = value.strip()
|
| 125 |
+
|
| 126 |
+
investment_banking_group_str = parsed["investment_banking_group"].upper()
|
| 127 |
+
|
| 128 |
+
try:
|
| 129 |
+
investment_banking_group = _INVESTMENT_BANKING_GROUP_MAPPINGS[investment_banking_group_str]
|
| 130 |
+
except KeyError as e:
|
| 131 |
+
raise ValueError(f"Unknown investment banking group: {str(e)}")
|
| 132 |
+
|
| 133 |
+
try:
|
| 134 |
+
confidence = float(parsed["confidence"])
|
| 135 |
+
except ValueError:
|
| 136 |
+
raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
|
| 137 |
+
|
| 138 |
+
return InvestmentBankingGroupClassification(
|
| 139 |
+
investment_banking_group=investment_banking_group,
|
| 140 |
+
confidence=confidence,
|
| 141 |
+
reasoning=parsed["reasoning"],
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
async def classify_investment_banking_group(
|
| 145 |
+
self, linkedin_profile: LinkedinProfile, work_experience: Position
|
| 146 |
+
) -> InvestmentBankingGroupClassification:
|
| 147 |
+
"""
|
| 148 |
+
Classify a single work experience item from a LinkedIn profile into an investment banking group.
|
| 149 |
+
|
| 150 |
+
This method prepares the input for the language model, sends the query,
|
| 151 |
+
and processes the result to classify the work experience item into an investment banking group.
|
| 152 |
+
|
| 153 |
+
Args:
|
| 154 |
+
linkedin_profile (LinkedinProfile): The full LinkedIn profile of the individual.
|
| 155 |
+
work_experience (Position): The specific work experience item to classify.
|
| 156 |
+
|
| 157 |
+
Returns:
|
| 158 |
+
InvestmentBankingGroupClassification: The classification result for the work experience item.
|
| 159 |
+
|
| 160 |
+
Raises:
|
| 161 |
+
ValueError: If the prompt evaluation fails to produce a result.
|
| 162 |
+
"""
|
| 163 |
+
prompt = self._prompt_loader.create_prompt(
|
| 164 |
+
self._prompt_template,
|
| 165 |
+
llm_service=self._llm_service,
|
| 166 |
+
output_formatter=self._parse_output,
|
| 167 |
+
resume=format_profile_as_resume(linkedin_profile),
|
| 168 |
+
work_experience=format_position(work_experience),
|
| 169 |
+
)
|
| 170 |
+
return await prompt.evaluate() # type: ignore
|
src/vsp/app/main.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
main.py
|
| 3 |
+
|
| 4 |
+
This module provides functionality for processing and classifying LinkedIn profiles.
|
| 5 |
+
It uses various classifiers to analyze education and work experience data,
|
| 6 |
+
including specific classifications for investment banking roles.
|
| 7 |
+
|
| 8 |
+
The main components are:
|
| 9 |
+
1. Data models for classified education and work experience.
|
| 10 |
+
2. A function to process a LinkedIn profile and generate classification results.
|
| 11 |
+
3. An example usage demonstrating how to use the module with a JSON file input.
|
| 12 |
+
|
| 13 |
+
This module leverages asyncio for concurrent processing of profile data.
|
| 14 |
+
|
| 15 |
+
Classes:
|
| 16 |
+
ClassifiedEducation: Represents a classified education item.
|
| 17 |
+
ClassifiedWorkExperience: Represents a classified work experience item.
|
| 18 |
+
LinkedinProfileClassificationResults: Holds the classification results for a LinkedIn profile.
|
| 19 |
+
|
| 20 |
+
Functions:
|
| 21 |
+
process_linkedin_profile: Asynchronously processes a LinkedIn profile and returns classification results.
|
| 22 |
+
main: An example async function demonstrating how to use the module.
|
| 23 |
+
|
| 24 |
+
Usage:
|
| 25 |
+
This script can be run directly to process a sample LinkedIn profile:
|
| 26 |
+
$ python main.py
|
| 27 |
+
|
| 28 |
+
Or the `process_linkedin_profile` function can be imported and used in other modules.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
import asyncio
|
| 32 |
+
import json
|
| 33 |
+
from typing import Sequence
|
| 34 |
+
|
| 35 |
+
from pydantic import BaseModel, Field
|
| 36 |
+
|
| 37 |
+
from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
|
| 38 |
+
from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
|
| 39 |
+
SecondaryJobType,
|
| 40 |
+
WorkExperienceClassification,
|
| 41 |
+
WorkExperienceClassifier,
|
| 42 |
+
)
|
| 43 |
+
from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
|
| 44 |
+
InvestmentBankingGroupClassification,
|
| 45 |
+
InvestmentBankingGroupClassifier,
|
| 46 |
+
)
|
| 47 |
+
from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class ClassifiedEducation(BaseModel):
|
| 51 |
+
"""
|
| 52 |
+
Represents a classified education item from a LinkedIn profile.
|
| 53 |
+
|
| 54 |
+
Attributes:
|
| 55 |
+
education (Education): The original education data from the LinkedIn profile.
|
| 56 |
+
classification (EducationClassification): The classification results for the education item.
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
education: Education
|
| 60 |
+
classification: EducationClassification
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class ClassifiedWorkExperience(BaseModel):
|
| 64 |
+
"""
|
| 65 |
+
Represents a classified work experience item from a LinkedIn profile.
|
| 66 |
+
|
| 67 |
+
Attributes:
|
| 68 |
+
position (Position): The original position data from the LinkedIn profile.
|
| 69 |
+
work_experience_classification (WorkExperienceClassification): The general work experience classification.
|
| 70 |
+
investment_banking_classification (InvestmentBankingGroupClassification | None):
|
| 71 |
+
The investment banking group classification, if applicable.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
position: Position
|
| 75 |
+
work_experience_classification: WorkExperienceClassification
|
| 76 |
+
investment_banking_classification: InvestmentBankingGroupClassification | None = None
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class LinkedinProfileClassificationResults(BaseModel):
|
| 80 |
+
"""
|
| 81 |
+
Holds the classification results for a LinkedIn profile.
|
| 82 |
+
|
| 83 |
+
Attributes:
|
| 84 |
+
classified_educations (Sequence[ClassifiedEducation]): List of classified education items.
|
| 85 |
+
classified_work_experiences (Sequence[ClassifiedWorkExperience]): List of classified work experience items.
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
|
| 89 |
+
classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
|
| 93 |
+
"""
|
| 94 |
+
Asynchronously process a LinkedIn profile and generate classification results.
|
| 95 |
+
|
| 96 |
+
This function performs the following steps:
|
| 97 |
+
1. Classifies all education items in the profile.
|
| 98 |
+
2. Classifies all work experience items in the profile.
|
| 99 |
+
3. For work experiences classified as investment banking, performs an additional
|
| 100 |
+
investment banking group classification.
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
profile (LinkedinProfile): The LinkedIn profile to process.
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
LinkedinProfileClassificationResults: The classification results for the profile.
|
| 107 |
+
"""
|
| 108 |
+
education_classifier = EducationClassifier()
|
| 109 |
+
work_experience_classifier = WorkExperienceClassifier()
|
| 110 |
+
investment_banking_classifier = InvestmentBankingGroupClassifier()
|
| 111 |
+
|
| 112 |
+
# Classify educations
|
| 113 |
+
education_tasks = [education_classifier.classify_education(profile, education) for education in profile.educations]
|
| 114 |
+
education_classifications = await asyncio.gather(*education_tasks)
|
| 115 |
+
|
| 116 |
+
# Classify work experiences
|
| 117 |
+
work_experience_tasks = [
|
| 118 |
+
work_experience_classifier.classify_work_experience(profile, position) for position in profile.positions
|
| 119 |
+
]
|
| 120 |
+
work_experience_classifications = await asyncio.gather(*work_experience_tasks)
|
| 121 |
+
|
| 122 |
+
# Classify investment banking groups for relevant positions
|
| 123 |
+
classified_work_experiences = []
|
| 124 |
+
for position, work_classification in zip(profile.positions, work_experience_classifications):
|
| 125 |
+
classified_work_experience = ClassifiedWorkExperience(
|
| 126 |
+
position=position, work_experience_classification=work_classification
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
if work_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING:
|
| 130 |
+
ib_classification = await investment_banking_classifier.classify_investment_banking_group(profile, position)
|
| 131 |
+
classified_work_experience.investment_banking_classification = ib_classification
|
| 132 |
+
|
| 133 |
+
classified_work_experiences.append(classified_work_experience)
|
| 134 |
+
|
| 135 |
+
# Prepare the results using Pydantic models
|
| 136 |
+
return LinkedinProfileClassificationResults(
|
| 137 |
+
classified_educations=[
|
| 138 |
+
ClassifiedEducation(education=education, classification=classification)
|
| 139 |
+
for education, classification in zip(profile.educations, education_classifications)
|
| 140 |
+
],
|
| 141 |
+
classified_work_experiences=classified_work_experiences,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
async def main() -> None:
|
| 146 |
+
"""
|
| 147 |
+
Example usage of the LinkedIn profile processing functionality.
|
| 148 |
+
|
| 149 |
+
This function demonstrates how to:
|
| 150 |
+
1. Load a LinkedIn profile from a JSON file.
|
| 151 |
+
2. Process the profile using the `process_linkedin_profile` function.
|
| 152 |
+
3. Print the classification results.
|
| 153 |
+
|
| 154 |
+
The function is asynchronous and should be run in an event loop.
|
| 155 |
+
"""
|
| 156 |
+
with open("src/notebooks/data/eric_armagost.json") as f:
|
| 157 |
+
profile = LinkedinProfile.profile_from_json(json.load(f))
|
| 158 |
+
results = await process_linkedin_profile(profile)
|
| 159 |
+
print(results.model_dump_json(indent=2))
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
if __name__ == "__main__":
|
| 163 |
+
asyncio.run(main())
|
src/vsp/app/model/linkedin/linkedin_models.py
CHANGED
|
@@ -129,17 +129,17 @@ class LinkedinProfile(BaseSchema):
|
|
| 129 |
courses: List[Course] | None = []
|
| 130 |
certifications: List[Certification] | None = []
|
| 131 |
|
| 132 |
-
|
| 133 |
-
def profile_from_json(json: dict[str, Any]) -> LinkedinProfile:
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
| 129 |
courses: List[Course] | None = []
|
| 130 |
certifications: List[Certification] | None = []
|
| 131 |
|
| 132 |
+
@staticmethod
|
| 133 |
+
def profile_from_json(json: dict[str, Any]) -> "LinkedinProfile":
|
| 134 |
+
"""
|
| 135 |
+
Create a Profile instance from the given JSON data.
|
| 136 |
+
|
| 137 |
+
:param json: The JSON data to create a Profile instance from.
|
| 138 |
+
:return: A Profile instance created from the given JSON data.
|
| 139 |
+
"""
|
| 140 |
+
profile = LinkedinProfile.model_validate(json)
|
| 141 |
+
if (
|
| 142 |
+
profile.full_positions is not None and profile.positions is not None
|
| 143 |
+
): # Fixing a RapidAPI thing where the positions may be incomplete, and we want to use the full_positions
|
| 144 |
+
profile.positions = profile.full_positions
|
| 145 |
+
return profile
|
src/vsp/app/prompts/prompt_loader.py
CHANGED
|
@@ -104,14 +104,15 @@ class PromptLoader:
|
|
| 104 |
FileNotFoundError: If the specified prompt family directory doesn't exist.
|
| 105 |
"""
|
| 106 |
parts = full_name.split("/")
|
| 107 |
-
if len(parts)
|
| 108 |
-
raise ValueError(f"Invalid prompt name format. Expected 'family/name', got '{full_name}'")
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
|
|
|
| 112 |
|
| 113 |
if not prompt_dir.is_dir():
|
| 114 |
-
raise FileNotFoundError(f"Prompt
|
| 115 |
|
| 116 |
template = PromptTemplate(name)
|
| 117 |
for file in prompt_dir.glob(f"{name}_*.txt"):
|
|
|
|
| 104 |
FileNotFoundError: If the specified prompt family directory doesn't exist.
|
| 105 |
"""
|
| 106 |
parts = full_name.split("/")
|
| 107 |
+
if len(parts) < 2:
|
| 108 |
+
raise ValueError(f"Invalid prompt name format. Expected at least 'family/name', got '{full_name}'")
|
| 109 |
|
| 110 |
+
name = parts[-1]
|
| 111 |
+
nested_dirs = parts[:-1]
|
| 112 |
+
prompt_dir = self.base_path.joinpath(*nested_dirs)
|
| 113 |
|
| 114 |
if not prompt_dir.is_dir():
|
| 115 |
+
raise FileNotFoundError(f"Prompt directory not found: {prompt_dir}")
|
| 116 |
|
| 117 |
template = PromptTemplate(name)
|
| 118 |
for file in prompt_dir.glob(f"{name}_*.txt"):
|
src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_human.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Please classify the following investment banking work experience item based on the job candidate's full resume and the specific investment banking work experience information provided from their LinkedIn profile. Analyze both sources of information carefully to determine the most accurate classification for the investment banking group the candidate worked in.
|
| 2 |
+
|
| 3 |
+
Full Resume:
|
| 4 |
+
{resume}
|
| 5 |
+
|
| 6 |
+
Specific Investment Banking Work Experience Item:
|
| 7 |
+
{work_experience}
|
| 8 |
+
|
| 9 |
+
Provide your best guess on the investment banking group, your confidence level (0.0 to 1.0), and your reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
|
src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_system.txt
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are an expert investment banking recruiter. You have been asked to analyze a specific investment banking work experience item from a job candidate's LinkedIn profile. Your task is to accurately classify the investment banking group the candidate worked in.
|
| 2 |
+
|
| 3 |
+
You can use your understanding of the investment banking industry as an expert recruiter in investment banking to make the best guess. Use the provided information carefully to make accurate classifications.
|
| 4 |
+
|
| 5 |
+
Investment banking groups:
|
| 6 |
+
1. GENERALIST: A group that covers a wide range of industries and sectors.
|
| 7 |
+
2. M_AND_A: A group focused on mergers and acquisitions.
|
| 8 |
+
3. LEVERAGED_FINANCE: A group specializing in leveraged finance transactions.
|
| 9 |
+
4. FINANCIAL_SPONSORS: A group that works with private equity firms and other financial sponsors.
|
| 10 |
+
5. EQUITY_CAPITAL_MARKETS: A group focused on equity capital markets transactions.
|
| 11 |
+
6. DEBT_CAPITAL_MARKETS: A group specializing in debt capital markets transactions.
|
| 12 |
+
7. RESTRUCTURING: A group that handles restructuring and distressed situations.
|
| 13 |
+
8. PRIVATE_FUNDS: A group that works with private equity funds and other private investment vehicles.
|
| 14 |
+
9. STRUCTURED_FINANCE: A group specializing in structured finance products.
|
| 15 |
+
10. HEALTHCARE: A group focused on healthcare industry transactions.
|
| 16 |
+
11. INDUSTRIALS: A group that covers industrial sector transactions.
|
| 17 |
+
12. BUSINESS_SERVICES: A group focused on business services industry transactions.
|
| 18 |
+
13. CONSUMER_RETAIL: A group specializing in consumer and retail sector transactions.
|
| 19 |
+
14. ENERGY_NATURAL_RESOURCES: A group focused on energy, natural resources, cleantech, and utilities.
|
| 20 |
+
15. REAL_ESTATE_LODGINGS: A group specializing in real estate, gaming / casinos, and lodging transactions.
|
| 21 |
+
16. TECHNOLOGY_MEDIA_TELECOM: A group focused on technology, media, and telecommunications transactions.
|
| 22 |
+
17. MEDIA_ENTERTAINMENT: A group specializing in media and entertainment industry transactions.
|
| 23 |
+
18. FINANCIAL_INSTITUTIONS: A group focused on financial institutions and banking transactions.
|
| 24 |
+
19. INFRASTRUCTURE: A group specializing in infrastructure and transportation transactions.
|
| 25 |
+
20. OTHER: Any group that doesn't fit the above categories.
|
| 26 |
+
|
| 27 |
+
Provide your response in the following format exactly:
|
| 28 |
+
|
| 29 |
+
investment_banking_group: [One of the twenty investment banking groups listed above]
|
| 30 |
+
confidence: [0.0 to 1.0]
|
| 31 |
+
reasoning: [Your explanation here]
|
| 32 |
+
|
| 33 |
+
Ensure each part of your response is on a separate line, exactly as shown above. There should be only three lines.
|
| 34 |
+
Your confidence level should reflect how certain you are about your classification based on the information provided.
|
| 35 |
+
|
| 36 |
+
In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
|
tests/app/{test_education_classifier.py β classifiers/test_education_classifier.py}
RENAMED
|
@@ -2,7 +2,7 @@ from unittest.mock import AsyncMock, MagicMock
|
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
|
| 5 |
-
from vsp.app.education_classifier import EducationClassification, EducationClassifier, SchoolType
|
| 6 |
from vsp.app.model.linkedin.linkedin_models import DateComponent, Education, LinkedinProfile
|
| 7 |
|
| 8 |
|
|
|
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
|
| 5 |
+
from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier, SchoolType
|
| 6 |
from vsp.app.model.linkedin.linkedin_models import DateComponent, Education, LinkedinProfile
|
| 7 |
|
| 8 |
|
tests/app/classifiers/work_experience/test_investment_banking_group_classifier.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from unittest.mock import AsyncMock, MagicMock
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
|
| 6 |
+
InvestmentBankingGroup,
|
| 7 |
+
InvestmentBankingGroupClassification,
|
| 8 |
+
InvestmentBankingGroupClassifier,
|
| 9 |
+
)
|
| 10 |
+
from vsp.app.model.linkedin.linkedin_models import DateComponent, LinkedinProfile, Position
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@pytest.fixture
|
| 14 |
+
def mock_llm_service():
|
| 15 |
+
return AsyncMock()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@pytest.fixture
|
| 19 |
+
def mock_prompt_loader():
|
| 20 |
+
loader = MagicMock()
|
| 21 |
+
loader.load_template.return_value = MagicMock()
|
| 22 |
+
loader.create_prompt.return_value = AsyncMock()
|
| 23 |
+
return loader
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@pytest.fixture
|
| 27 |
+
def investment_banking_group_classifier(mock_llm_service, mock_prompt_loader):
|
| 28 |
+
return InvestmentBankingGroupClassifier(llm_service=mock_llm_service, prompt_loader=mock_prompt_loader)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@pytest.fixture
|
| 32 |
+
def sample_linkedin_profile():
|
| 33 |
+
return LinkedinProfile(
|
| 34 |
+
first_name="John",
|
| 35 |
+
last_name="Doe",
|
| 36 |
+
position=[
|
| 37 |
+
Position(
|
| 38 |
+
title="Investment Banking Analyst",
|
| 39 |
+
company_name="Goldman Sachs",
|
| 40 |
+
start=DateComponent(year=2020, month=1),
|
| 41 |
+
end=DateComponent(year=2022, month=12),
|
| 42 |
+
)
|
| 43 |
+
],
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@pytest.mark.asyncio
|
| 48 |
+
async def test_classify_investment_banking_group(
|
| 49 |
+
investment_banking_group_classifier, sample_linkedin_profile, mock_prompt_loader
|
| 50 |
+
):
|
| 51 |
+
mock_prompt = mock_prompt_loader.create_prompt.return_value
|
| 52 |
+
mock_prompt.evaluate.return_value = InvestmentBankingGroupClassification(
|
| 53 |
+
investment_banking_group=InvestmentBankingGroup.M_AND_A,
|
| 54 |
+
confidence=0.95,
|
| 55 |
+
reasoning="This is an M&A role based on the job description and company.",
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
result = await investment_banking_group_classifier.classify_investment_banking_group(
|
| 59 |
+
sample_linkedin_profile, sample_linkedin_profile.positions[0]
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
assert isinstance(result, InvestmentBankingGroupClassification)
|
| 63 |
+
assert result.investment_banking_group == InvestmentBankingGroup.M_AND_A
|
| 64 |
+
assert result.confidence == 0.95
|
| 65 |
+
assert "M&A role" in result.reasoning
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
@pytest.mark.parametrize(
|
| 69 |
+
"group,expected_group,confidence,reasoning",
|
| 70 |
+
[
|
| 71 |
+
("GENERALIST", InvestmentBankingGroup.GENERALIST, 0.9, "Test reasoning"),
|
| 72 |
+
("M_AND_A", InvestmentBankingGroup.M_AND_A, 0.8, "Test reasoning"),
|
| 73 |
+
("LEVERAGED_FINANCE", InvestmentBankingGroup.LEVERAGED_FINANCE, 0.7, "Test reasoning"),
|
| 74 |
+
("EQUITY_CAPITAL_MARKETS", InvestmentBankingGroup.EQUITY_CAPITAL_MARKETS, 0.95, "Test reasoning"),
|
| 75 |
+
("OTHER", InvestmentBankingGroup.OTHER, 0.6, "Test reasoning"),
|
| 76 |
+
],
|
| 77 |
+
)
|
| 78 |
+
def test_parse_output(group, expected_group, confidence, reasoning):
|
| 79 |
+
output = f"investment_banking_group: {group}\nconfidence: {confidence}\nreasoning: {reasoning}"
|
| 80 |
+
parsed = InvestmentBankingGroupClassifier._parse_output(output)
|
| 81 |
+
assert parsed.investment_banking_group == expected_group
|
| 82 |
+
assert parsed.confidence == confidence
|
| 83 |
+
assert parsed.reasoning == reasoning
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def test_parse_output_invalid_group():
|
| 87 |
+
with pytest.raises(ValueError, match="Unknown investment banking group"):
|
| 88 |
+
InvestmentBankingGroupClassifier._parse_output(
|
| 89 |
+
"investment_banking_group: INVALID\nconfidence: 0.9\nreasoning: Test reasoning"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def test_parse_output_invalid_confidence():
|
| 94 |
+
with pytest.raises(ValueError, match="Invalid confidence value"):
|
| 95 |
+
InvestmentBankingGroupClassifier._parse_output(
|
| 96 |
+
"investment_banking_group: M_AND_A\nconfidence: invalid\nreasoning: Test reasoning"
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@pytest.mark.asyncio
|
| 101 |
+
async def test_classify_investment_banking_group_error_handling(
|
| 102 |
+
investment_banking_group_classifier, sample_linkedin_profile, mock_prompt_loader
|
| 103 |
+
):
|
| 104 |
+
mock_prompt = mock_prompt_loader.create_prompt.return_value
|
| 105 |
+
mock_prompt.evaluate.side_effect = ValueError("Test error")
|
| 106 |
+
|
| 107 |
+
with pytest.raises(ValueError, match="Test error"):
|
| 108 |
+
await investment_banking_group_classifier.classify_investment_banking_group(
|
| 109 |
+
sample_linkedin_profile, sample_linkedin_profile.positions[0]
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@pytest.mark.asyncio
|
| 114 |
+
async def test_classify_investment_banking_group_edge_cases(investment_banking_group_classifier, mock_prompt_loader):
|
| 115 |
+
mock_prompt = mock_prompt_loader.create_prompt.return_value
|
| 116 |
+
mock_prompt.evaluate.return_value = InvestmentBankingGroupClassification(
|
| 117 |
+
investment_banking_group=InvestmentBankingGroup.OTHER,
|
| 118 |
+
confidence=0.5,
|
| 119 |
+
reasoning="Unable to determine specific group",
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
# Test with minimal profile and position data
|
| 123 |
+
minimal_profile = LinkedinProfile(first_name="Jane", last_name="Smith")
|
| 124 |
+
minimal_position = Position(title="Intern", company_name="Finance Corp")
|
| 125 |
+
|
| 126 |
+
result = await investment_banking_group_classifier.classify_investment_banking_group(
|
| 127 |
+
minimal_profile, minimal_position
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
assert isinstance(result, InvestmentBankingGroupClassification)
|
| 131 |
+
assert result.investment_banking_group == InvestmentBankingGroup.OTHER
|
| 132 |
+
assert result.confidence == 0.5
|
| 133 |
+
assert "Unable to determine" in result.reasoning
|
tests/app/{test_work_experience_classifier.py β classifiers/work_experience/test_work_experience_classifier.py}
RENAMED
|
@@ -2,13 +2,13 @@ from unittest.mock import AsyncMock, MagicMock
|
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
|
| 5 |
-
from vsp.app.
|
| 6 |
-
from vsp.app.work_experience_classifier import (
|
| 7 |
PrimaryJobType,
|
| 8 |
SecondaryJobType,
|
| 9 |
WorkExperienceClassification,
|
| 10 |
WorkExperienceClassifier,
|
| 11 |
)
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
@pytest.fixture
|
|
|
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
|
| 5 |
+
from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
|
|
|
|
| 6 |
PrimaryJobType,
|
| 7 |
SecondaryJobType,
|
| 8 |
WorkExperienceClassification,
|
| 9 |
WorkExperienceClassifier,
|
| 10 |
)
|
| 11 |
+
from vsp.app.model.linkedin.linkedin_models import DateComponent, LinkedinProfile, Position
|
| 12 |
|
| 13 |
|
| 14 |
@pytest.fixture
|
tests/app/prompts/test_prompt_loader.py
CHANGED
|
@@ -1,20 +1,15 @@
|
|
| 1 |
from pathlib import Path
|
|
|
|
| 2 |
|
| 3 |
import pytest
|
| 4 |
|
| 5 |
from vsp.app.prompts.prompt_loader import PromptLoader
|
| 6 |
-
from vsp.llm.openai.openai import AsyncOpenAIService
|
| 7 |
-
from vsp.llm.openai.openai_model import OpenAIModel
|
| 8 |
from vsp.llm.prompt import Prompt
|
| 9 |
|
| 10 |
|
| 11 |
-
def get_test_prompts_path():
|
| 12 |
-
return Path(__file__).parent / "test_prompts"
|
| 13 |
-
|
| 14 |
-
|
| 15 |
@pytest.fixture
|
| 16 |
def prompt_loader():
|
| 17 |
-
return PromptLoader(
|
| 18 |
|
| 19 |
|
| 20 |
def test_load_template_success(prompt_loader):
|
|
@@ -45,7 +40,7 @@ def test_load_template_nonexistent(prompt_loader):
|
|
| 45 |
|
| 46 |
def test_create_prompt(prompt_loader):
|
| 47 |
template = prompt_loader.load_template("basic_test/1 - test")
|
| 48 |
-
llm_service =
|
| 49 |
prompt = prompt_loader.create_prompt(
|
| 50 |
template,
|
| 51 |
llm_service=llm_service,
|
|
@@ -66,9 +61,49 @@ def test_create_prompt(prompt_loader):
|
|
| 66 |
|
| 67 |
def test_create_prompt_with_system_prompt(prompt_loader):
|
| 68 |
template = prompt_loader.load_template("basic_test/2 - test2")
|
| 69 |
-
llm_service =
|
| 70 |
prompt = prompt_loader.create_prompt(template, llm_service=llm_service)
|
| 71 |
|
| 72 |
assert isinstance(prompt, Prompt)
|
| 73 |
assert prompt._system_prompt.get_prompt() == "This is a system prompt for test2."
|
| 74 |
assert prompt._user_prompt.get_prompt() == "This is another user prompt."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
+
from unittest.mock import AsyncMock
|
| 3 |
|
| 4 |
import pytest
|
| 5 |
|
| 6 |
from vsp.app.prompts.prompt_loader import PromptLoader
|
|
|
|
|
|
|
| 7 |
from vsp.llm.prompt import Prompt
|
| 8 |
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
@pytest.fixture
|
| 11 |
def prompt_loader():
|
| 12 |
+
return PromptLoader(Path(__file__).parent / "test_prompts")
|
| 13 |
|
| 14 |
|
| 15 |
def test_load_template_success(prompt_loader):
|
|
|
|
| 40 |
|
| 41 |
def test_create_prompt(prompt_loader):
|
| 42 |
template = prompt_loader.load_template("basic_test/1 - test")
|
| 43 |
+
llm_service = AsyncMock()
|
| 44 |
prompt = prompt_loader.create_prompt(
|
| 45 |
template,
|
| 46 |
llm_service=llm_service,
|
|
|
|
| 61 |
|
| 62 |
def test_create_prompt_with_system_prompt(prompt_loader):
|
| 63 |
template = prompt_loader.load_template("basic_test/2 - test2")
|
| 64 |
+
llm_service = AsyncMock()
|
| 65 |
prompt = prompt_loader.create_prompt(template, llm_service=llm_service)
|
| 66 |
|
| 67 |
assert isinstance(prompt, Prompt)
|
| 68 |
assert prompt._system_prompt.get_prompt() == "This is a system prompt for test2."
|
| 69 |
assert prompt._user_prompt.get_prompt() == "This is another user prompt."
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# New test for nested prompts
|
| 73 |
+
def test_load_nested_template(prompt_loader):
|
| 74 |
+
template = prompt_loader.load_template("basic_test/nested/1 - nested_test")
|
| 75 |
+
assert template.name == "1 - nested_test"
|
| 76 |
+
assert template.user_prompt.strip() == "This is a nested user prompt."
|
| 77 |
+
assert template.system_prompt is None
|
| 78 |
+
assert template.partial_assistant_prompt is None
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def test_load_nested_template_nonexistent(prompt_loader):
|
| 82 |
+
with pytest.raises(FileNotFoundError):
|
| 83 |
+
prompt_loader.load_template("basic_test/nonexistent_nested/1 - test")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def test_load_nested_template_invalid_format(prompt_loader):
|
| 87 |
+
with pytest.raises(ValueError, match="Invalid prompt name format"):
|
| 88 |
+
prompt_loader.load_template("invalid_format")
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def test_create_nested_prompt(prompt_loader):
|
| 92 |
+
template = prompt_loader.load_template("basic_test/nested/1 - nested_test")
|
| 93 |
+
llm_service = AsyncMock()
|
| 94 |
+
prompt = prompt_loader.create_prompt(
|
| 95 |
+
template,
|
| 96 |
+
llm_service=llm_service,
|
| 97 |
+
max_tokens=300,
|
| 98 |
+
temperature=0.5,
|
| 99 |
+
output_formatter=lambda x: {"nested_result": x},
|
| 100 |
+
variable="nested_test_value",
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
assert isinstance(prompt, Prompt)
|
| 104 |
+
assert prompt._llm_service == llm_service
|
| 105 |
+
assert prompt._max_tokens == 300
|
| 106 |
+
assert prompt._temperature == 0.5
|
| 107 |
+
assert callable(prompt._output_formatter)
|
| 108 |
+
assert prompt._user_prompt.get_prompt() == "This is a nested user prompt."
|
| 109 |
+
assert prompt._system_prompt is None
|
tests/app/prompts/test_prompts/basic_test/1 - test_user.txt
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
This is a user prompt.
|
|
|
|
|
|
tests/app/prompts/test_prompts/basic_test/nested/1 - nested_test_human.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
This is a nested user prompt.
|