navkast commited on
Commit
145d03b
Β·
unverified Β·
1 Parent(s): 77dc531

Add investment banking classifier (#2)

Browse files

* Add investment banking classifier

* fix ipynb

Files changed (25) hide show
  1. .env +8 -0
  2. .gitignore +0 -1
  3. .vscode/settings.json +3 -1
  4. pyproject.toml +1 -2
  5. run.py +6 -0
  6. src/notebooks/{education_classifier.ipynb β†’ classifiers/education_classifier.ipynb} +3 -3
  7. src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb +81 -0
  8. src/notebooks/{work_experience_classifier.ipynb β†’ classifiers/work_experience/work_experience_classifier.ipynb} +3 -10
  9. src/notebooks/data/eric_armagost.json +395 -0
  10. src/vsp/app/classifiers/__init__.py +0 -0
  11. src/vsp/app/{education_classifier.py β†’ classifiers/education_classifier.py} +0 -0
  12. src/vsp/app/classifiers/work_experience/__init__.py +0 -0
  13. src/vsp/app/{work_experience_classifier.py β†’ classifiers/work_experience/general_work_experience_classifier.py} +0 -0
  14. src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py +170 -0
  15. src/vsp/app/main.py +163 -0
  16. src/vsp/app/model/linkedin/linkedin_models.py +14 -14
  17. src/vsp/app/prompts/prompt_loader.py +6 -5
  18. src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_human.txt +9 -0
  19. src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_system.txt +36 -0
  20. tests/app/{test_education_classifier.py β†’ classifiers/test_education_classifier.py} +1 -1
  21. tests/app/classifiers/work_experience/test_investment_banking_group_classifier.py +133 -0
  22. tests/app/{test_work_experience_classifier.py β†’ classifiers/work_experience/test_work_experience_classifier.py} +2 -2
  23. tests/app/prompts/test_prompt_loader.py +44 -9
  24. tests/app/prompts/test_prompts/basic_test/1 - test_user.txt +0 -1
  25. tests/app/prompts/test_prompts/basic_test/nested/1 - nested_test_human.txt +1 -0
.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ### AIDER
2
+
3
+ AIDER_TEST_CMD=make test
4
+ AIDER_LINT_CMD=make format
5
+
6
+
7
+ ### PYTHON
8
+ PYTHONPATH=src
.gitignore CHANGED
@@ -122,7 +122,6 @@ celerybeat.pid
122
  *.sage.py
123
 
124
  # Environments
125
- .env
126
  .venv
127
  env/
128
  venv/
 
122
  *.sage.py
123
 
124
  # Environments
 
125
  .venv
126
  env/
127
  venv/
.vscode/settings.json CHANGED
@@ -12,5 +12,7 @@
12
  "tests"
13
  ],
14
  "python.testing.unittestEnabled": false,
15
- "python.testing.pytestEnabled": true
 
 
16
  }
 
12
  "tests"
13
  ],
14
  "python.testing.unittestEnabled": false,
15
+ "python.testing.pytestEnabled": true,
16
+ "python.envFile": "${workspaceFolder}/.env",
17
+ "python.analysis.extraPaths": ["${workspaceFolder}/src"]
18
  }
pyproject.toml CHANGED
@@ -32,7 +32,7 @@ dev = [
32
  ]
33
 
34
  [tool.hatch.build.targets.wheel]
35
- packages = ["src/template"]
36
 
37
  [tool.ruff]
38
  lint.select = ["E", "F", "I", "N"]
@@ -57,7 +57,6 @@ namespace_packages = true
57
  explicit_package_bases = true
58
  enable_incomplete_feature = ["NewGenericSyntax"]
59
 
60
-
61
  [build-system]
62
  requires = ["hatchling"]
63
  build-backend = "hatchling.build"
 
32
  ]
33
 
34
  [tool.hatch.build.targets.wheel]
35
+ packages = ["src/vsp"]
36
 
37
  [tool.ruff]
38
  lint.select = ["E", "F", "I", "N"]
 
57
  explicit_package_bases = true
58
  enable_incomplete_feature = ["NewGenericSyntax"]
59
 
 
60
  [build-system]
61
  requires = ["hatchling"]
62
  build-backend = "hatchling.build"
run.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ from vsp.app.main import main
4
+
5
+ if __name__ == "__main__":
6
+ asyncio.run(main())
src/notebooks/{education_classifier.ipynb β†’ classifiers/education_classifier.ipynb} RENAMED
@@ -8,13 +8,13 @@
8
  "source": [
9
  "import json\n",
10
  "\n",
11
- "from vsp.app.model.linkedin.linkedin_models import profile_from_json\n",
12
  "\n",
13
  "with open(\"data/hansae_catlett.json\") as f:\n",
14
  " data = json.load(f)\n",
15
  " # convert to linkedin profile\n",
16
  "\n",
17
- "profile = profile_from_json(data)"
18
  ]
19
  },
20
  {
@@ -80,7 +80,7 @@
80
  "source": [
81
  "import asyncio\n",
82
  "\n",
83
- "from vsp.app.education_classifier import EducationClassifier\n",
84
  "\n",
85
  "education_classifier = EducationClassifier()\n",
86
  "\n",
 
8
  "source": [
9
  "import json\n",
10
  "\n",
11
+ "from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
12
  "\n",
13
  "with open(\"data/hansae_catlett.json\") as f:\n",
14
  " data = json.load(f)\n",
15
  " # convert to linkedin profile\n",
16
  "\n",
17
+ "profile = LinkedinProfile.profile_from_json(data)"
18
  ]
19
  },
20
  {
 
80
  "source": [
81
  "import asyncio\n",
82
  "\n",
83
+ "from vsp.app.classifiers.education_classifier import EducationClassifier\n",
84
  "\n",
85
  "education_classifier = EducationClassifier()\n",
86
  "\n",
src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "\n",
11
+ "os.getcwd()\n",
12
+ "os.chdir(path=os.getcwd() + \"/../../../\")"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": null,
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "import json\n",
22
+ "\n",
23
+ "from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
24
+ "\n",
25
+ "print(os.getcwd())\n",
26
+ "\n",
27
+ "with open(\"notebooks/data/eric_armagost.json\") as f:\n",
28
+ " data = json.load(f)\n",
29
+ "\n",
30
+ "profile = LinkedinProfile.profile_from_json(data)"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": null,
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "import asyncio\n",
40
+ "\n",
41
+ "from vsp.app.classifiers.work_experience.investment_banking_group_classifier import InvestmentBankingGroupClassifier\n",
42
+ "\n",
43
+ "investment_banking_classifier = InvestmentBankingGroupClassifier()\n",
44
+ "\n",
45
+ "all_positions_classified = []\n",
46
+ "\n",
47
+ "\n",
48
+ "async def classify_investment_banking_group(profile, position):\n",
49
+ " classification = await investment_banking_classifier.classify_investment_banking_group(profile, position)\n",
50
+ " all_positions_classified.append(classification)\n",
51
+ "\n",
52
+ "\n",
53
+ "await asyncio.gather(*[classify_investment_banking_group(profile, position) for position in profile.positions])\n",
54
+ "\n",
55
+ "for classification in all_positions_classified:\n",
56
+ " print(classification.model_dump_json(indent=2))"
57
+ ]
58
+ }
59
+ ],
60
+ "metadata": {
61
+ "kernelspec": {
62
+ "display_name": ".venv",
63
+ "language": "python",
64
+ "name": "python3"
65
+ },
66
+ "language_info": {
67
+ "codemirror_mode": {
68
+ "name": "ipython",
69
+ "version": 3
70
+ },
71
+ "file_extension": ".py",
72
+ "mimetype": "text/x-python",
73
+ "name": "python",
74
+ "nbconvert_exporter": "python",
75
+ "pygments_lexer": "ipython3",
76
+ "version": "3.12.5"
77
+ }
78
+ },
79
+ "nbformat": 4,
80
+ "nbformat_minor": 2
81
+ }
src/notebooks/{work_experience_classifier.ipynb β†’ classifiers/work_experience/work_experience_classifier.ipynb} RENAMED
@@ -12,13 +12,6 @@
12
  "os.chdir(path=os.getcwd() + \"/../\")"
13
  ]
14
  },
15
- {
16
- "cell_type": "code",
17
- "execution_count": null,
18
- "metadata": {},
19
- "outputs": [],
20
- "source": []
21
- },
22
  {
23
  "cell_type": "code",
24
  "execution_count": 2,
@@ -27,13 +20,13 @@
27
  "source": [
28
  "import json\n",
29
  "\n",
30
- "from vsp.app.model.linkedin.linkedin_models import profile_from_json\n",
31
  "\n",
32
  "with open(\"notebooks/data/hansae_catlett.json\") as f:\n",
33
  " data = json.load(f)\n",
34
  " # convert to linkedin profile\n",
35
  "\n",
36
- "profile = profile_from_json(data)"
37
  ]
38
  },
39
  {
@@ -136,7 +129,7 @@
136
  "source": [
137
  "import asyncio\n",
138
  "\n",
139
- "from vsp.app.work_experience_classifier import WorkExperienceClassifier\n",
140
  "\n",
141
  "work_experience_classifier = WorkExperienceClassifier()\n",
142
  "\n",
 
12
  "os.chdir(path=os.getcwd() + \"/../\")"
13
  ]
14
  },
 
 
 
 
 
 
 
15
  {
16
  "cell_type": "code",
17
  "execution_count": 2,
 
20
  "source": [
21
  "import json\n",
22
  "\n",
23
+ "from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
24
  "\n",
25
  "with open(\"notebooks/data/hansae_catlett.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
29
+ "profile = LinkedinProfile.profile_from_json(data)"
30
  ]
31
  },
32
  {
 
129
  "source": [
130
  "import asyncio\n",
131
  "\n",
132
+ "from vsp.app.classifiers.work_experience.general_work_experience_classifier import WorkExperienceClassifier\n",
133
  "\n",
134
  "work_experience_classifier = WorkExperienceClassifier()\n",
135
  "\n",
src/notebooks/data/eric_armagost.json ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "urn": "ACoAAApkrrAB8nFEIP2l00BAXgSQ78iUNprebWc",
3
+ "username": "eric-armagost-a144904a",
4
+ "firstName": "Eric",
5
+ "lastName": "Armagost",
6
+ "isCreator": false,
7
+ "isOpenToWork": false,
8
+ "isHiring": false,
9
+ "profilePicture": "https://media.licdn.com/dms/image/v2/C5603AQGiv3LeddNxgQ/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1656093036751?e=1730332800&v=beta&t=HruxzTkWpJZ9iro3k20ZKxPXHBerz1altnRU3PPXdUI",
10
+ "backgroundImage": null,
11
+ "summary": "",
12
+ "headline": "Investor at Accel-KKR",
13
+ "geo": {
14
+ "country": "United States",
15
+ "city": "San Francisco, California",
16
+ "full": "San Francisco, California, United States"
17
+ },
18
+ "languages": [
19
+ {
20
+ "name": "German",
21
+ "proficiency": "LIMITED_WORKING"
22
+ }
23
+ ],
24
+ "educations": [
25
+ {
26
+ "start": {
27
+ "year": 0,
28
+ "month": 0,
29
+ "day": 0
30
+ },
31
+ "end": {
32
+ "year": 0,
33
+ "month": 0,
34
+ "day": 0
35
+ },
36
+ "fieldOfStudy": "Business Economics",
37
+ "degree": "Bachelor of Arts (B.A.)",
38
+ "grade": "",
39
+ "schoolName": "Brown University",
40
+ "description": "",
41
+ "activities": "",
42
+ "url": "https://www.linkedin.com/school/brown-university/",
43
+ "schoolId": "157343"
44
+ }
45
+ ],
46
+ "position": [
47
+ {
48
+ "companyId": 57752,
49
+ "companyName": "Accel-KKR",
50
+ "companyUsername": "accel-kkr",
51
+ "companyURL": "https://www.linkedin.com/company/accel-kkr/",
52
+ "companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQHJ-Smp3x90Yg/company-logo_400_400/company-logo_400_400/0/1630565829245/accel_kkr_logo?e=1733356800&v=beta&t=MuqS5XhM4c0BNCvk0cCsWIE5YzOWMu7HaFpld467P0w",
53
+ "companyIndustry": "Venture Capital & Private Equity",
54
+ "companyStaffCountRange": "51 - 200",
55
+ "title": "Investment Professional",
56
+ "multiLocaleTitle": {
57
+ "en_US": "Investment Professional"
58
+ },
59
+ "multiLocaleCompanyName": {
60
+ "en_US": "Accel-KKR"
61
+ },
62
+ "location": "Menlo Park, California",
63
+ "description": "Founded in 2000, Accel-KKR is a leading technology-focused private equity firm dedicated exclusively to investing in software and technology-enabled services companies. \n\nOur typical transactions include: \n\u2022 Acquisitions and recapitalizations of founder-owned or closely-held private companies \n\u2022 Buyouts of divisions, subsidiaries and business units from public companies \n\u2022 Take-private transactions of small public companies\n\u2022 Structured minority equity and debt investments\n\nwww.accel-kkr.com",
64
+ "employmentType": "",
65
+ "start": {
66
+ "year": 2017,
67
+ "month": 5,
68
+ "day": 0
69
+ },
70
+ "end": {
71
+ "year": 0,
72
+ "month": 0,
73
+ "day": 0
74
+ }
75
+ },
76
+ {
77
+ "companyId": 166939,
78
+ "companyName": "William Blair & Company",
79
+ "companyUsername": "william-blair-company",
80
+ "companyURL": "https://www.linkedin.com/company/william-blair-company/",
81
+ "companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQE63WLd1DEgkw/company-logo_400_400/company-logo_400_400/0/1669143293900/william_blair__company_logo?e=1733356800&v=beta&t=lpEb9QzV4aUl3rBk_cyFI2qbyc3fVHQohUoO7kju8PM",
82
+ "companyIndustry": "Financial Services",
83
+ "companyStaffCountRange": "1001 - 5000",
84
+ "title": "Investment Banking",
85
+ "multiLocaleTitle": {
86
+ "en_US": "Investment Banking"
87
+ },
88
+ "multiLocaleCompanyName": {
89
+ "en_US": "William Blair & Company"
90
+ },
91
+ "location": "Chicago",
92
+ "description": "",
93
+ "employmentType": "",
94
+ "start": {
95
+ "year": 2015,
96
+ "month": 6,
97
+ "day": 0
98
+ },
99
+ "end": {
100
+ "year": 2017,
101
+ "month": 5,
102
+ "day": 0
103
+ }
104
+ },
105
+ {
106
+ "companyId": 1307,
107
+ "companyName": "Fidelity Investments",
108
+ "companyUsername": "fidelity-investments",
109
+ "companyURL": "https://www.linkedin.com/company/fidelity-investments/",
110
+ "companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQF6iwa5VSk-PQ/company-logo_400_400/company-logo_400_400/0/1724950881853/fidelity_investments_logo?e=1733356800&v=beta&t=aUkLhTRKySJtcU9mrvOCDYVYFVH2jStpOVaepBbdO80",
111
+ "companyIndustry": "Financial Services",
112
+ "companyStaffCountRange": "10001 - 0",
113
+ "title": "FFAS Corporate Finance",
114
+ "multiLocaleTitle": {
115
+ "en_US": "FFAS Corporate Finance"
116
+ },
117
+ "multiLocaleCompanyName": {
118
+ "en_US": "Fidelity Investments"
119
+ },
120
+ "location": "Smithfield, RI",
121
+ "description": "",
122
+ "employmentType": "",
123
+ "start": {
124
+ "year": 2014,
125
+ "month": 6,
126
+ "day": 0
127
+ },
128
+ "end": {
129
+ "year": 2015,
130
+ "month": 5,
131
+ "day": 0
132
+ }
133
+ },
134
+ {
135
+ "companyId": 1307,
136
+ "companyName": "Fidelity Investments",
137
+ "companyUsername": "fidelity-investments",
138
+ "companyURL": "https://www.linkedin.com/company/fidelity-investments/",
139
+ "companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQF6iwa5VSk-PQ/company-logo_400_400/company-logo_400_400/0/1724950881853/fidelity_investments_logo?e=1733356800&v=beta&t=aUkLhTRKySJtcU9mrvOCDYVYFVH2jStpOVaepBbdO80",
140
+ "companyIndustry": "Financial Services",
141
+ "companyStaffCountRange": "10001 - 0",
142
+ "title": "PI Corporate Finance",
143
+ "multiLocaleTitle": {
144
+ "en_US": "PI Corporate Finance"
145
+ },
146
+ "multiLocaleCompanyName": {
147
+ "en_US": "Fidelity Investments"
148
+ },
149
+ "location": "Smithfield, RI",
150
+ "description": "",
151
+ "employmentType": "",
152
+ "start": {
153
+ "year": 2013,
154
+ "month": 6,
155
+ "day": 0
156
+ },
157
+ "end": {
158
+ "year": 2014,
159
+ "month": 5,
160
+ "day": 0
161
+ }
162
+ }
163
+ ],
164
+ "fullPositions": [
165
+ {
166
+ "companyId": 57752,
167
+ "companyName": "Accel-KKR",
168
+ "companyUsername": "accel-kkr",
169
+ "companyURL": "https://www.linkedin.com/company/accel-kkr/",
170
+ "companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQHJ-Smp3x90Yg/company-logo_400_400/company-logo_400_400/0/1630565829245/accel_kkr_logo?e=1733356800&v=beta&t=MuqS5XhM4c0BNCvk0cCsWIE5YzOWMu7HaFpld467P0w",
171
+ "companyIndustry": "Venture Capital & Private Equity",
172
+ "companyStaffCountRange": "51 - 200",
173
+ "title": "Investment Professional",
174
+ "multiLocaleTitle": {
175
+ "en_US": "Investment Professional"
176
+ },
177
+ "multiLocaleCompanyName": {
178
+ "en_US": "Accel-KKR"
179
+ },
180
+ "location": "Menlo Park, California",
181
+ "description": "Founded in 2000, Accel-KKR is a leading technology-focused private equity firm dedicated exclusively to investing in software and technology-enabled services companies. \n\nOur typical transactions include: \n\u2022 Acquisitions and recapitalizations of founder-owned or closely-held private companies \n\u2022 Buyouts of divisions, subsidiaries and business units from public companies \n\u2022 Take-private transactions of small public companies\n\u2022 Structured minority equity and debt investments\n\nwww.accel-kkr.com",
182
+ "employmentType": "",
183
+ "start": {
184
+ "year": 2017,
185
+ "month": 5,
186
+ "day": 0
187
+ },
188
+ "end": {
189
+ "year": 0,
190
+ "month": 0,
191
+ "day": 0
192
+ }
193
+ },
194
+ {
195
+ "companyId": 166939,
196
+ "companyName": "William Blair & Company",
197
+ "companyUsername": "william-blair-company",
198
+ "companyURL": "https://www.linkedin.com/company/william-blair-company/",
199
+ "companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQE63WLd1DEgkw/company-logo_400_400/company-logo_400_400/0/1669143293900/william_blair__company_logo?e=1733356800&v=beta&t=lpEb9QzV4aUl3rBk_cyFI2qbyc3fVHQohUoO7kju8PM",
200
+ "companyIndustry": "Financial Services",
201
+ "companyStaffCountRange": "1001 - 5000",
202
+ "title": "Investment Banking",
203
+ "multiLocaleTitle": {
204
+ "en_US": "Investment Banking"
205
+ },
206
+ "multiLocaleCompanyName": {
207
+ "en_US": "William Blair & Company"
208
+ },
209
+ "location": "Chicago",
210
+ "description": "",
211
+ "employmentType": "",
212
+ "start": {
213
+ "year": 2015,
214
+ "month": 6,
215
+ "day": 0
216
+ },
217
+ "end": {
218
+ "year": 2017,
219
+ "month": 5,
220
+ "day": 0
221
+ }
222
+ },
223
+ {
224
+ "companyId": 1307,
225
+ "companyName": "Fidelity Investments",
226
+ "companyUsername": "fidelity-investments",
227
+ "companyURL": "https://www.linkedin.com/company/fidelity-investments/",
228
+ "companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQF6iwa5VSk-PQ/company-logo_400_400/company-logo_400_400/0/1724950881853/fidelity_investments_logo?e=1733356800&v=beta&t=aUkLhTRKySJtcU9mrvOCDYVYFVH2jStpOVaepBbdO80",
229
+ "companyIndustry": "Financial Services",
230
+ "companyStaffCountRange": "10001 - 0",
231
+ "title": "FFAS Corporate Finance",
232
+ "multiLocaleTitle": {
233
+ "en_US": "FFAS Corporate Finance"
234
+ },
235
+ "multiLocaleCompanyName": {
236
+ "en_US": "Fidelity Investments"
237
+ },
238
+ "location": "Smithfield, RI",
239
+ "description": "",
240
+ "employmentType": "",
241
+ "start": {
242
+ "year": 2014,
243
+ "month": 6,
244
+ "day": 0
245
+ },
246
+ "end": {
247
+ "year": 2015,
248
+ "month": 5,
249
+ "day": 0
250
+ }
251
+ },
252
+ {
253
+ "companyId": 1307,
254
+ "companyName": "Fidelity Investments",
255
+ "companyUsername": "fidelity-investments",
256
+ "companyURL": "https://www.linkedin.com/company/fidelity-investments/",
257
+ "companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQF6iwa5VSk-PQ/company-logo_400_400/company-logo_400_400/0/1724950881853/fidelity_investments_logo?e=1733356800&v=beta&t=aUkLhTRKySJtcU9mrvOCDYVYFVH2jStpOVaepBbdO80",
258
+ "companyIndustry": "Financial Services",
259
+ "companyStaffCountRange": "10001 - 0",
260
+ "title": "PI Corporate Finance",
261
+ "multiLocaleTitle": {
262
+ "en_US": "PI Corporate Finance"
263
+ },
264
+ "multiLocaleCompanyName": {
265
+ "en_US": "Fidelity Investments"
266
+ },
267
+ "location": "Smithfield, RI",
268
+ "description": "",
269
+ "employmentType": "",
270
+ "start": {
271
+ "year": 2013,
272
+ "month": 6,
273
+ "day": 0
274
+ },
275
+ "end": {
276
+ "year": 2014,
277
+ "month": 5,
278
+ "day": 0
279
+ }
280
+ }
281
+ ],
282
+ "skills": [
283
+ {
284
+ "name": "Microsoft Office",
285
+ "passedSkillAssessment": false,
286
+ "endorsementsCount": 5
287
+ },
288
+ {
289
+ "name": "Microsoft Excel",
290
+ "passedSkillAssessment": false,
291
+ "endorsementsCount": 2
292
+ },
293
+ {
294
+ "name": "Microsoft Word",
295
+ "passedSkillAssessment": false,
296
+ "endorsementsCount": 7
297
+ },
298
+ {
299
+ "name": "Customer Service",
300
+ "passedSkillAssessment": false
301
+ },
302
+ {
303
+ "name": "PowerPoint",
304
+ "passedSkillAssessment": false,
305
+ "endorsementsCount": 4
306
+ },
307
+ {
308
+ "name": "English",
309
+ "passedSkillAssessment": false
310
+ },
311
+ {
312
+ "name": "Windows",
313
+ "passedSkillAssessment": false
314
+ },
315
+ {
316
+ "name": "Research",
317
+ "passedSkillAssessment": false,
318
+ "endorsementsCount": 2
319
+ },
320
+ {
321
+ "name": "Outlook",
322
+ "passedSkillAssessment": false
323
+ },
324
+ {
325
+ "name": "Teaching",
326
+ "passedSkillAssessment": false
327
+ },
328
+ {
329
+ "name": "Photoshop",
330
+ "passedSkillAssessment": false
331
+ },
332
+ {
333
+ "name": "Public Speaking",
334
+ "passedSkillAssessment": false,
335
+ "endorsementsCount": 2
336
+ },
337
+ {
338
+ "name": "HTML",
339
+ "passedSkillAssessment": false
340
+ },
341
+ {
342
+ "name": "Strategic Planning",
343
+ "passedSkillAssessment": false
344
+ },
345
+ {
346
+ "name": "Budgets",
347
+ "passedSkillAssessment": false
348
+ }
349
+ ],
350
+ "givenRecommendation": null,
351
+ "givenRecommendationCount": 0,
352
+ "receivedRecommendation": null,
353
+ "receivedRecommendationCount": 0,
354
+ "courses": null,
355
+ "certifications": null,
356
+ "honors": null,
357
+ "projects": {
358
+ "total": 0,
359
+ "items": null
360
+ },
361
+ "volunteering": [
362
+ {
363
+ "title": "Fundraiser Leader",
364
+ "start": {
365
+ "year": 2014,
366
+ "month": 7,
367
+ "day": 0
368
+ },
369
+ "end": {
370
+ "year": 2014,
371
+ "month": 7,
372
+ "day": 0
373
+ },
374
+ "companyName": "AHA",
375
+ "CompanyId": "",
376
+ "companyUrl": "",
377
+ "companyLogo": ""
378
+ }
379
+ ],
380
+ "supportedLocales": [
381
+ {
382
+ "country": "US",
383
+ "language": "en"
384
+ }
385
+ ],
386
+ "multiLocaleFirstName": {
387
+ "en": "Eric"
388
+ },
389
+ "multiLocaleLastName": {
390
+ "en": "Armagost"
391
+ },
392
+ "multiLocaleHeadline": {
393
+ "en": "Investor at Accel-KKR"
394
+ }
395
+ }
src/vsp/app/classifiers/__init__.py ADDED
File without changes
src/vsp/app/{education_classifier.py β†’ classifiers/education_classifier.py} RENAMED
File without changes
src/vsp/app/classifiers/work_experience/__init__.py ADDED
File without changes
src/vsp/app/{work_experience_classifier.py β†’ classifiers/work_experience/general_work_experience_classifier.py} RENAMED
File without changes
src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ investment_banking_group_classifier.py
3
+
4
+ This module provides functionality for classifying investment banking groups
5
+ based on work experiences from LinkedIn profiles. It uses a language model to
6
+ determine the specific investment banking group a position belongs to.
7
+
8
+ Classes:
9
+ InvestmentBankingGroup: Enum representing different investment banking groups.
10
+ InvestmentBankingGroupClassification: Pydantic model for classification results.
11
+ InvestmentBankingGroupClassifier: Main class for classifying investment banking groups.
12
+
13
+ Usage:
14
+ classifier = InvestmentBankingGroupClassifier()
15
+ classification = await classifier.classify_investment_banking_group(linkedin_profile, work_experience)
16
+ """
17
+
18
+ from enum import Enum
19
+ from typing import Any, Final
20
+
21
+ from pydantic import BaseModel, Field
22
+
23
+ from vsp.app import bindings
24
+ from vsp.app.model.linkedin.linkedin_formatters import format_position, format_profile_as_resume
25
+ from vsp.app.model.linkedin.linkedin_models import LinkedinProfile, Position
26
+ from vsp.app.prompts.prompt_loader import PromptLoader
27
+ from vsp.llm.llm_service import LLMService
28
+
29
+
30
+ class InvestmentBankingGroup(str, Enum):
31
+ """Enumeration of different investment banking groups."""
32
+
33
+ GENERALIST = "Generalist"
34
+ M_AND_A = "M&A"
35
+ LEVERAGED_FINANCE = "Leveraged Finance"
36
+ FINANCIAL_SPONSORS = "Financial Sponsors"
37
+ EQUITY_CAPITAL_MARKETS = "Equity Capital Markets"
38
+ DEBT_CAPITAL_MARKETS = "Debt Capital Markets"
39
+ RESTRUCTURING = "Restructuring"
40
+ PRIVATE_FUNDS = "Private Funds"
41
+ STRUCTURED_FINANCE = "Structured Finance"
42
+ HEALTHCARE = "Healthcare"
43
+ INDUSTRIALS = "Industrials"
44
+ BUSINESS_SERVICES = "Business Services"
45
+ CONSUMER_RETAIL = "Consumer & Retail"
46
+ ENERGY_NATURAL_RESOURCES = "Energy / Natural Resources / Cleantech / Utilities"
47
+ REAL_ESTATE_LODGINGS = "Real Estate, Gaming & Lodging"
48
+ TECHNOLOGY_MEDIA_TELECOM = "Technology / Software / TMT"
49
+ MEDIA_ENTERTAINMENT = "Media & Entertainment"
50
+ FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
51
+ INFRASTRUCTURE = "Infrastructure / Transportation"
52
+ OTHER = "Other"
53
+
54
+
55
+ _INVESTMENT_BANKING_GROUP_MAPPINGS: Final[dict[str, InvestmentBankingGroup]] = {
56
+ group.name: group for group in InvestmentBankingGroup
57
+ }
58
+
59
+
60
+ class InvestmentBankingGroupClassification(BaseModel):
61
+ """
62
+ Pydantic model representing the classification result for an investment banking group.
63
+
64
+ Attributes:
65
+ investment_banking_group (InvestmentBankingGroup): The classified investment banking group.
66
+ confidence (float): Confidence level of the classification, between 0.0 and 1.0.
67
+ reasoning (str): Explanation for the classification decision.
68
+ """
69
+
70
+ investment_banking_group: InvestmentBankingGroup = Field(description="The investment banking group")
71
+ confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
72
+ reasoning: str = Field(description="Explanation for the classification")
73
+
74
+
75
+ class InvestmentBankingGroupClassifier:
76
+ """
77
+ A class for classifying investment banking groups based on work experiences from LinkedIn profiles.
78
+
79
+ This classifier uses a language model to determine the specific investment banking group
80
+ a position belongs to based on the information provided in a LinkedIn profile and specific work experience.
81
+
82
+ Attributes:
83
+ _llm_service (LLMService): The language model service used for classification.
84
+ _prompt_template (Any): The template for generating prompts for the language model.
85
+ _prompt_loader (PromptLoader): The loader for prompt templates.
86
+ """
87
+
88
+ def __init__(
89
+ self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
90
+ ) -> None:
91
+ """
92
+ Initialize the InvestmentBankingGroupClassifier.
93
+
94
+ Args:
95
+ llm_service (LLMService, optional): The language model service to use.
96
+ Defaults to the OpenAI service defined in bindings.
97
+ prompt_loader (PromptLoader, optional): The prompt loader to use.
98
+ Defaults to the prompt loader defined in bindings.
99
+ """
100
+ self._llm_service = llm_service
101
+ self._prompt_template = prompt_loader.load_template(
102
+ "work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier"
103
+ )
104
+ self._prompt_loader = prompt_loader
105
+
106
+ @staticmethod
107
+ def _parse_output(output: str) -> InvestmentBankingGroupClassification:
108
+ """
109
+ Parse the output from the language model into an InvestmentBankingGroupClassification object.
110
+
111
+ Args:
112
+ output (str): The raw output string from the language model.
113
+
114
+ Returns:
115
+ InvestmentBankingGroupClassification: A structured representation of the classification result.
116
+
117
+ Raises:
118
+ ValueError: If the output contains an unknown investment banking group or invalid confidence value.
119
+ """
120
+ lines = output.strip().split("\n")
121
+ parsed: dict[str, Any] = {}
122
+ for line in lines:
123
+ key, value = line.split(":", 1)
124
+ parsed[key.strip()] = value.strip()
125
+
126
+ investment_banking_group_str = parsed["investment_banking_group"].upper()
127
+
128
+ try:
129
+ investment_banking_group = _INVESTMENT_BANKING_GROUP_MAPPINGS[investment_banking_group_str]
130
+ except KeyError as e:
131
+ raise ValueError(f"Unknown investment banking group: {str(e)}")
132
+
133
+ try:
134
+ confidence = float(parsed["confidence"])
135
+ except ValueError:
136
+ raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
137
+
138
+ return InvestmentBankingGroupClassification(
139
+ investment_banking_group=investment_banking_group,
140
+ confidence=confidence,
141
+ reasoning=parsed["reasoning"],
142
+ )
143
+
144
+ async def classify_investment_banking_group(
145
+ self, linkedin_profile: LinkedinProfile, work_experience: Position
146
+ ) -> InvestmentBankingGroupClassification:
147
+ """
148
+ Classify a single work experience item from a LinkedIn profile into an investment banking group.
149
+
150
+ This method prepares the input for the language model, sends the query,
151
+ and processes the result to classify the work experience item into an investment banking group.
152
+
153
+ Args:
154
+ linkedin_profile (LinkedinProfile): The full LinkedIn profile of the individual.
155
+ work_experience (Position): The specific work experience item to classify.
156
+
157
+ Returns:
158
+ InvestmentBankingGroupClassification: The classification result for the work experience item.
159
+
160
+ Raises:
161
+ ValueError: If the prompt evaluation fails to produce a result.
162
+ """
163
+ prompt = self._prompt_loader.create_prompt(
164
+ self._prompt_template,
165
+ llm_service=self._llm_service,
166
+ output_formatter=self._parse_output,
167
+ resume=format_profile_as_resume(linkedin_profile),
168
+ work_experience=format_position(work_experience),
169
+ )
170
+ return await prompt.evaluate() # type: ignore
src/vsp/app/main.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ main.py
3
+
4
+ This module provides functionality for processing and classifying LinkedIn profiles.
5
+ It uses various classifiers to analyze education and work experience data,
6
+ including specific classifications for investment banking roles.
7
+
8
+ The main components are:
9
+ 1. Data models for classified education and work experience.
10
+ 2. A function to process a LinkedIn profile and generate classification results.
11
+ 3. An example usage demonstrating how to use the module with a JSON file input.
12
+
13
+ This module leverages asyncio for concurrent processing of profile data.
14
+
15
+ Classes:
16
+ ClassifiedEducation: Represents a classified education item.
17
+ ClassifiedWorkExperience: Represents a classified work experience item.
18
+ LinkedinProfileClassificationResults: Holds the classification results for a LinkedIn profile.
19
+
20
+ Functions:
21
+ process_linkedin_profile: Asynchronously processes a LinkedIn profile and returns classification results.
22
+ main: An example async function demonstrating how to use the module.
23
+
24
+ Usage:
25
+ This script can be run directly to process a sample LinkedIn profile:
26
+ $ python main.py
27
+
28
+ Or the `process_linkedin_profile` function can be imported and used in other modules.
29
+ """
30
+
31
+ import asyncio
32
+ import json
33
+ from typing import Sequence
34
+
35
+ from pydantic import BaseModel, Field
36
+
37
+ from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
38
+ from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
39
+ SecondaryJobType,
40
+ WorkExperienceClassification,
41
+ WorkExperienceClassifier,
42
+ )
43
+ from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
44
+ InvestmentBankingGroupClassification,
45
+ InvestmentBankingGroupClassifier,
46
+ )
47
+ from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
48
+
49
+
50
+ class ClassifiedEducation(BaseModel):
51
+ """
52
+ Represents a classified education item from a LinkedIn profile.
53
+
54
+ Attributes:
55
+ education (Education): The original education data from the LinkedIn profile.
56
+ classification (EducationClassification): The classification results for the education item.
57
+ """
58
+
59
+ education: Education
60
+ classification: EducationClassification
61
+
62
+
63
+ class ClassifiedWorkExperience(BaseModel):
64
+ """
65
+ Represents a classified work experience item from a LinkedIn profile.
66
+
67
+ Attributes:
68
+ position (Position): The original position data from the LinkedIn profile.
69
+ work_experience_classification (WorkExperienceClassification): The general work experience classification.
70
+ investment_banking_classification (InvestmentBankingGroupClassification | None):
71
+ The investment banking group classification, if applicable.
72
+ """
73
+
74
+ position: Position
75
+ work_experience_classification: WorkExperienceClassification
76
+ investment_banking_classification: InvestmentBankingGroupClassification | None = None
77
+
78
+
79
+ class LinkedinProfileClassificationResults(BaseModel):
80
+ """
81
+ Holds the classification results for a LinkedIn profile.
82
+
83
+ Attributes:
84
+ classified_educations (Sequence[ClassifiedEducation]): List of classified education items.
85
+ classified_work_experiences (Sequence[ClassifiedWorkExperience]): List of classified work experience items.
86
+ """
87
+
88
+ classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
89
+ classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
90
+
91
+
92
+ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
93
+ """
94
+ Asynchronously process a LinkedIn profile and generate classification results.
95
+
96
+ This function performs the following steps:
97
+ 1. Classifies all education items in the profile.
98
+ 2. Classifies all work experience items in the profile.
99
+ 3. For work experiences classified as investment banking, performs an additional
100
+ investment banking group classification.
101
+
102
+ Args:
103
+ profile (LinkedinProfile): The LinkedIn profile to process.
104
+
105
+ Returns:
106
+ LinkedinProfileClassificationResults: The classification results for the profile.
107
+ """
108
+ education_classifier = EducationClassifier()
109
+ work_experience_classifier = WorkExperienceClassifier()
110
+ investment_banking_classifier = InvestmentBankingGroupClassifier()
111
+
112
+ # Classify educations
113
+ education_tasks = [education_classifier.classify_education(profile, education) for education in profile.educations]
114
+ education_classifications = await asyncio.gather(*education_tasks)
115
+
116
+ # Classify work experiences
117
+ work_experience_tasks = [
118
+ work_experience_classifier.classify_work_experience(profile, position) for position in profile.positions
119
+ ]
120
+ work_experience_classifications = await asyncio.gather(*work_experience_tasks)
121
+
122
+ # Classify investment banking groups for relevant positions
123
+ classified_work_experiences = []
124
+ for position, work_classification in zip(profile.positions, work_experience_classifications):
125
+ classified_work_experience = ClassifiedWorkExperience(
126
+ position=position, work_experience_classification=work_classification
127
+ )
128
+
129
+ if work_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING:
130
+ ib_classification = await investment_banking_classifier.classify_investment_banking_group(profile, position)
131
+ classified_work_experience.investment_banking_classification = ib_classification
132
+
133
+ classified_work_experiences.append(classified_work_experience)
134
+
135
+ # Prepare the results using Pydantic models
136
+ return LinkedinProfileClassificationResults(
137
+ classified_educations=[
138
+ ClassifiedEducation(education=education, classification=classification)
139
+ for education, classification in zip(profile.educations, education_classifications)
140
+ ],
141
+ classified_work_experiences=classified_work_experiences,
142
+ )
143
+
144
+
145
+ async def main() -> None:
146
+ """
147
+ Example usage of the LinkedIn profile processing functionality.
148
+
149
+ This function demonstrates how to:
150
+ 1. Load a LinkedIn profile from a JSON file.
151
+ 2. Process the profile using the `process_linkedin_profile` function.
152
+ 3. Print the classification results.
153
+
154
+ The function is asynchronous and should be run in an event loop.
155
+ """
156
+ with open("src/notebooks/data/eric_armagost.json") as f:
157
+ profile = LinkedinProfile.profile_from_json(json.load(f))
158
+ results = await process_linkedin_profile(profile)
159
+ print(results.model_dump_json(indent=2))
160
+
161
+
162
+ if __name__ == "__main__":
163
+ asyncio.run(main())
src/vsp/app/model/linkedin/linkedin_models.py CHANGED
@@ -129,17 +129,17 @@ class LinkedinProfile(BaseSchema):
129
  courses: List[Course] | None = []
130
  certifications: List[Certification] | None = []
131
 
132
-
133
- def profile_from_json(json: dict[str, Any]) -> LinkedinProfile:
134
- """
135
- Create a Profile instance from the given JSON data.
136
-
137
- :param json: The JSON data to create a Profile instance from.
138
- :return: A Profile instance created from the given JSON data.
139
- """
140
- profile = LinkedinProfile.model_validate(json)
141
- if (
142
- profile.full_positions is not None and profile.positions is not None
143
- ): # Fixing a RapidAPI thing where the positions may be incomplete, and we want to use the full_positions
144
- profile.positions = profile.full_positions
145
- return profile
 
129
  courses: List[Course] | None = []
130
  certifications: List[Certification] | None = []
131
 
132
+ @staticmethod
133
+ def profile_from_json(json: dict[str, Any]) -> "LinkedinProfile":
134
+ """
135
+ Create a Profile instance from the given JSON data.
136
+
137
+ :param json: The JSON data to create a Profile instance from.
138
+ :return: A Profile instance created from the given JSON data.
139
+ """
140
+ profile = LinkedinProfile.model_validate(json)
141
+ if (
142
+ profile.full_positions is not None and profile.positions is not None
143
+ ): # Fixing a RapidAPI thing where the positions may be incomplete, and we want to use the full_positions
144
+ profile.positions = profile.full_positions
145
+ return profile
src/vsp/app/prompts/prompt_loader.py CHANGED
@@ -104,14 +104,15 @@ class PromptLoader:
104
  FileNotFoundError: If the specified prompt family directory doesn't exist.
105
  """
106
  parts = full_name.split("/")
107
- if len(parts) != 2:
108
- raise ValueError(f"Invalid prompt name format. Expected 'family/name', got '{full_name}'")
109
 
110
- family, name = parts
111
- prompt_dir = self.base_path / family
 
112
 
113
  if not prompt_dir.is_dir():
114
- raise FileNotFoundError(f"Prompt family directory not found: {prompt_dir}")
115
 
116
  template = PromptTemplate(name)
117
  for file in prompt_dir.glob(f"{name}_*.txt"):
 
104
  FileNotFoundError: If the specified prompt family directory doesn't exist.
105
  """
106
  parts = full_name.split("/")
107
+ if len(parts) < 2:
108
+ raise ValueError(f"Invalid prompt name format. Expected at least 'family/name', got '{full_name}'")
109
 
110
+ name = parts[-1]
111
+ nested_dirs = parts[:-1]
112
+ prompt_dir = self.base_path.joinpath(*nested_dirs)
113
 
114
  if not prompt_dir.is_dir():
115
+ raise FileNotFoundError(f"Prompt directory not found: {prompt_dir}")
116
 
117
  template = PromptTemplate(name)
118
  for file in prompt_dir.glob(f"{name}_*.txt"):
src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_human.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Please classify the following investment banking work experience item based on the job candidate's full resume and the specific investment banking work experience information provided from their LinkedIn profile. Analyze both sources of information carefully to determine the most accurate classification for the investment banking group the candidate worked in.
2
+
3
+ Full Resume:
4
+ {resume}
5
+
6
+ Specific Investment Banking Work Experience Item:
7
+ {work_experience}
8
+
9
+ Provide your best guess on the investment banking group, your confidence level (0.0 to 1.0), and your reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
src/vsp/app/prompts/work_experience_classifier/investment_banking_group/1 - investment_banking_group_classifier_system.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are an expert investment banking recruiter. You have been asked to analyze a specific investment banking work experience item from a job candidate's LinkedIn profile. Your task is to accurately classify the investment banking group the candidate worked in.
2
+
3
+ You can use your understanding of the investment banking industry as an expert recruiter in investment banking to make the best guess. Use the provided information carefully to make accurate classifications.
4
+
5
+ Investment banking groups:
6
+ 1. GENERALIST: A group that covers a wide range of industries and sectors.
7
+ 2. M_AND_A: A group focused on mergers and acquisitions.
8
+ 3. LEVERAGED_FINANCE: A group specializing in leveraged finance transactions.
9
+ 4. FINANCIAL_SPONSORS: A group that works with private equity firms and other financial sponsors.
10
+ 5. EQUITY_CAPITAL_MARKETS: A group focused on equity capital markets transactions.
11
+ 6. DEBT_CAPITAL_MARKETS: A group specializing in debt capital markets transactions.
12
+ 7. RESTRUCTURING: A group that handles restructuring and distressed situations.
13
+ 8. PRIVATE_FUNDS: A group that works with private equity funds and other private investment vehicles.
14
+ 9. STRUCTURED_FINANCE: A group specializing in structured finance products.
15
+ 10. HEALTHCARE: A group focused on healthcare industry transactions.
16
+ 11. INDUSTRIALS: A group that covers industrial sector transactions.
17
+ 12. BUSINESS_SERVICES: A group focused on business services industry transactions.
18
+ 13. CONSUMER_RETAIL: A group specializing in consumer and retail sector transactions.
19
+ 14. ENERGY_NATURAL_RESOURCES: A group focused on energy, natural resources, cleantech, and utilities.
20
+ 15. REAL_ESTATE_LODGINGS: A group specializing in real estate, gaming / casinos, and lodging transactions.
21
+ 16. TECHNOLOGY_MEDIA_TELECOM: A group focused on technology, media, and telecommunications transactions.
22
+ 17. MEDIA_ENTERTAINMENT: A group specializing in media and entertainment industry transactions.
23
+ 18. FINANCIAL_INSTITUTIONS: A group focused on financial institutions and banking transactions.
24
+ 19. INFRASTRUCTURE: A group specializing in infrastructure and transportation transactions.
25
+ 20. OTHER: Any group that doesn't fit the above categories.
26
+
27
+ Provide your response in the following format exactly:
28
+
29
+ investment_banking_group: [One of the twenty investment banking groups listed above]
30
+ confidence: [0.0 to 1.0]
31
+ reasoning: [Your explanation here]
32
+
33
+ Ensure each part of your response is on a separate line, exactly as shown above. There should be only three lines.
34
+ Your confidence level should reflect how certain you are about your classification based on the information provided.
35
+
36
+ In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
tests/app/{test_education_classifier.py β†’ classifiers/test_education_classifier.py} RENAMED
@@ -2,7 +2,7 @@ from unittest.mock import AsyncMock, MagicMock
2
 
3
  import pytest
4
 
5
- from vsp.app.education_classifier import EducationClassification, EducationClassifier, SchoolType
6
  from vsp.app.model.linkedin.linkedin_models import DateComponent, Education, LinkedinProfile
7
 
8
 
 
2
 
3
  import pytest
4
 
5
+ from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier, SchoolType
6
  from vsp.app.model.linkedin.linkedin_models import DateComponent, Education, LinkedinProfile
7
 
8
 
tests/app/classifiers/work_experience/test_investment_banking_group_classifier.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unittest.mock import AsyncMock, MagicMock
2
+
3
+ import pytest
4
+
5
+ from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
6
+ InvestmentBankingGroup,
7
+ InvestmentBankingGroupClassification,
8
+ InvestmentBankingGroupClassifier,
9
+ )
10
+ from vsp.app.model.linkedin.linkedin_models import DateComponent, LinkedinProfile, Position
11
+
12
+
13
+ @pytest.fixture
14
+ def mock_llm_service():
15
+ return AsyncMock()
16
+
17
+
18
+ @pytest.fixture
19
+ def mock_prompt_loader():
20
+ loader = MagicMock()
21
+ loader.load_template.return_value = MagicMock()
22
+ loader.create_prompt.return_value = AsyncMock()
23
+ return loader
24
+
25
+
26
+ @pytest.fixture
27
+ def investment_banking_group_classifier(mock_llm_service, mock_prompt_loader):
28
+ return InvestmentBankingGroupClassifier(llm_service=mock_llm_service, prompt_loader=mock_prompt_loader)
29
+
30
+
31
+ @pytest.fixture
32
+ def sample_linkedin_profile():
33
+ return LinkedinProfile(
34
+ first_name="John",
35
+ last_name="Doe",
36
+ position=[
37
+ Position(
38
+ title="Investment Banking Analyst",
39
+ company_name="Goldman Sachs",
40
+ start=DateComponent(year=2020, month=1),
41
+ end=DateComponent(year=2022, month=12),
42
+ )
43
+ ],
44
+ )
45
+
46
+
47
+ @pytest.mark.asyncio
48
+ async def test_classify_investment_banking_group(
49
+ investment_banking_group_classifier, sample_linkedin_profile, mock_prompt_loader
50
+ ):
51
+ mock_prompt = mock_prompt_loader.create_prompt.return_value
52
+ mock_prompt.evaluate.return_value = InvestmentBankingGroupClassification(
53
+ investment_banking_group=InvestmentBankingGroup.M_AND_A,
54
+ confidence=0.95,
55
+ reasoning="This is an M&A role based on the job description and company.",
56
+ )
57
+
58
+ result = await investment_banking_group_classifier.classify_investment_banking_group(
59
+ sample_linkedin_profile, sample_linkedin_profile.positions[0]
60
+ )
61
+
62
+ assert isinstance(result, InvestmentBankingGroupClassification)
63
+ assert result.investment_banking_group == InvestmentBankingGroup.M_AND_A
64
+ assert result.confidence == 0.95
65
+ assert "M&A role" in result.reasoning
66
+
67
+
68
+ @pytest.mark.parametrize(
69
+ "group,expected_group,confidence,reasoning",
70
+ [
71
+ ("GENERALIST", InvestmentBankingGroup.GENERALIST, 0.9, "Test reasoning"),
72
+ ("M_AND_A", InvestmentBankingGroup.M_AND_A, 0.8, "Test reasoning"),
73
+ ("LEVERAGED_FINANCE", InvestmentBankingGroup.LEVERAGED_FINANCE, 0.7, "Test reasoning"),
74
+ ("EQUITY_CAPITAL_MARKETS", InvestmentBankingGroup.EQUITY_CAPITAL_MARKETS, 0.95, "Test reasoning"),
75
+ ("OTHER", InvestmentBankingGroup.OTHER, 0.6, "Test reasoning"),
76
+ ],
77
+ )
78
+ def test_parse_output(group, expected_group, confidence, reasoning):
79
+ output = f"investment_banking_group: {group}\nconfidence: {confidence}\nreasoning: {reasoning}"
80
+ parsed = InvestmentBankingGroupClassifier._parse_output(output)
81
+ assert parsed.investment_banking_group == expected_group
82
+ assert parsed.confidence == confidence
83
+ assert parsed.reasoning == reasoning
84
+
85
+
86
+ def test_parse_output_invalid_group():
87
+ with pytest.raises(ValueError, match="Unknown investment banking group"):
88
+ InvestmentBankingGroupClassifier._parse_output(
89
+ "investment_banking_group: INVALID\nconfidence: 0.9\nreasoning: Test reasoning"
90
+ )
91
+
92
+
93
+ def test_parse_output_invalid_confidence():
94
+ with pytest.raises(ValueError, match="Invalid confidence value"):
95
+ InvestmentBankingGroupClassifier._parse_output(
96
+ "investment_banking_group: M_AND_A\nconfidence: invalid\nreasoning: Test reasoning"
97
+ )
98
+
99
+
100
+ @pytest.mark.asyncio
101
+ async def test_classify_investment_banking_group_error_handling(
102
+ investment_banking_group_classifier, sample_linkedin_profile, mock_prompt_loader
103
+ ):
104
+ mock_prompt = mock_prompt_loader.create_prompt.return_value
105
+ mock_prompt.evaluate.side_effect = ValueError("Test error")
106
+
107
+ with pytest.raises(ValueError, match="Test error"):
108
+ await investment_banking_group_classifier.classify_investment_banking_group(
109
+ sample_linkedin_profile, sample_linkedin_profile.positions[0]
110
+ )
111
+
112
+
113
+ @pytest.mark.asyncio
114
+ async def test_classify_investment_banking_group_edge_cases(investment_banking_group_classifier, mock_prompt_loader):
115
+ mock_prompt = mock_prompt_loader.create_prompt.return_value
116
+ mock_prompt.evaluate.return_value = InvestmentBankingGroupClassification(
117
+ investment_banking_group=InvestmentBankingGroup.OTHER,
118
+ confidence=0.5,
119
+ reasoning="Unable to determine specific group",
120
+ )
121
+
122
+ # Test with minimal profile and position data
123
+ minimal_profile = LinkedinProfile(first_name="Jane", last_name="Smith")
124
+ minimal_position = Position(title="Intern", company_name="Finance Corp")
125
+
126
+ result = await investment_banking_group_classifier.classify_investment_banking_group(
127
+ minimal_profile, minimal_position
128
+ )
129
+
130
+ assert isinstance(result, InvestmentBankingGroupClassification)
131
+ assert result.investment_banking_group == InvestmentBankingGroup.OTHER
132
+ assert result.confidence == 0.5
133
+ assert "Unable to determine" in result.reasoning
tests/app/{test_work_experience_classifier.py β†’ classifiers/work_experience/test_work_experience_classifier.py} RENAMED
@@ -2,13 +2,13 @@ from unittest.mock import AsyncMock, MagicMock
2
 
3
  import pytest
4
 
5
- from vsp.app.model.linkedin.linkedin_models import DateComponent, LinkedinProfile, Position
6
- from vsp.app.work_experience_classifier import (
7
  PrimaryJobType,
8
  SecondaryJobType,
9
  WorkExperienceClassification,
10
  WorkExperienceClassifier,
11
  )
 
12
 
13
 
14
  @pytest.fixture
 
2
 
3
  import pytest
4
 
5
+ from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
 
6
  PrimaryJobType,
7
  SecondaryJobType,
8
  WorkExperienceClassification,
9
  WorkExperienceClassifier,
10
  )
11
+ from vsp.app.model.linkedin.linkedin_models import DateComponent, LinkedinProfile, Position
12
 
13
 
14
  @pytest.fixture
tests/app/prompts/test_prompt_loader.py CHANGED
@@ -1,20 +1,15 @@
1
  from pathlib import Path
 
2
 
3
  import pytest
4
 
5
  from vsp.app.prompts.prompt_loader import PromptLoader
6
- from vsp.llm.openai.openai import AsyncOpenAIService
7
- from vsp.llm.openai.openai_model import OpenAIModel
8
  from vsp.llm.prompt import Prompt
9
 
10
 
11
- def get_test_prompts_path():
12
- return Path(__file__).parent / "test_prompts"
13
-
14
-
15
  @pytest.fixture
16
  def prompt_loader():
17
- return PromptLoader(get_test_prompts_path())
18
 
19
 
20
  def test_load_template_success(prompt_loader):
@@ -45,7 +40,7 @@ def test_load_template_nonexistent(prompt_loader):
45
 
46
  def test_create_prompt(prompt_loader):
47
  template = prompt_loader.load_template("basic_test/1 - test")
48
- llm_service = AsyncOpenAIService(OpenAIModel.GPT_4_MINI)
49
  prompt = prompt_loader.create_prompt(
50
  template,
51
  llm_service=llm_service,
@@ -66,9 +61,49 @@ def test_create_prompt(prompt_loader):
66
 
67
  def test_create_prompt_with_system_prompt(prompt_loader):
68
  template = prompt_loader.load_template("basic_test/2 - test2")
69
- llm_service = AsyncOpenAIService(OpenAIModel.GPT_4_MINI)
70
  prompt = prompt_loader.create_prompt(template, llm_service=llm_service)
71
 
72
  assert isinstance(prompt, Prompt)
73
  assert prompt._system_prompt.get_prompt() == "This is a system prompt for test2."
74
  assert prompt._user_prompt.get_prompt() == "This is another user prompt."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from pathlib import Path
2
+ from unittest.mock import AsyncMock
3
 
4
  import pytest
5
 
6
  from vsp.app.prompts.prompt_loader import PromptLoader
 
 
7
  from vsp.llm.prompt import Prompt
8
 
9
 
 
 
 
 
10
  @pytest.fixture
11
  def prompt_loader():
12
+ return PromptLoader(Path(__file__).parent / "test_prompts")
13
 
14
 
15
  def test_load_template_success(prompt_loader):
 
40
 
41
  def test_create_prompt(prompt_loader):
42
  template = prompt_loader.load_template("basic_test/1 - test")
43
+ llm_service = AsyncMock()
44
  prompt = prompt_loader.create_prompt(
45
  template,
46
  llm_service=llm_service,
 
61
 
62
  def test_create_prompt_with_system_prompt(prompt_loader):
63
  template = prompt_loader.load_template("basic_test/2 - test2")
64
+ llm_service = AsyncMock()
65
  prompt = prompt_loader.create_prompt(template, llm_service=llm_service)
66
 
67
  assert isinstance(prompt, Prompt)
68
  assert prompt._system_prompt.get_prompt() == "This is a system prompt for test2."
69
  assert prompt._user_prompt.get_prompt() == "This is another user prompt."
70
+
71
+
72
+ # New test for nested prompts
73
+ def test_load_nested_template(prompt_loader):
74
+ template = prompt_loader.load_template("basic_test/nested/1 - nested_test")
75
+ assert template.name == "1 - nested_test"
76
+ assert template.user_prompt.strip() == "This is a nested user prompt."
77
+ assert template.system_prompt is None
78
+ assert template.partial_assistant_prompt is None
79
+
80
+
81
+ def test_load_nested_template_nonexistent(prompt_loader):
82
+ with pytest.raises(FileNotFoundError):
83
+ prompt_loader.load_template("basic_test/nonexistent_nested/1 - test")
84
+
85
+
86
+ def test_load_nested_template_invalid_format(prompt_loader):
87
+ with pytest.raises(ValueError, match="Invalid prompt name format"):
88
+ prompt_loader.load_template("invalid_format")
89
+
90
+
91
+ def test_create_nested_prompt(prompt_loader):
92
+ template = prompt_loader.load_template("basic_test/nested/1 - nested_test")
93
+ llm_service = AsyncMock()
94
+ prompt = prompt_loader.create_prompt(
95
+ template,
96
+ llm_service=llm_service,
97
+ max_tokens=300,
98
+ temperature=0.5,
99
+ output_formatter=lambda x: {"nested_result": x},
100
+ variable="nested_test_value",
101
+ )
102
+
103
+ assert isinstance(prompt, Prompt)
104
+ assert prompt._llm_service == llm_service
105
+ assert prompt._max_tokens == 300
106
+ assert prompt._temperature == 0.5
107
+ assert callable(prompt._output_formatter)
108
+ assert prompt._user_prompt.get_prompt() == "This is a nested user prompt."
109
+ assert prompt._system_prompt is None
tests/app/prompts/test_prompts/basic_test/1 - test_user.txt DELETED
@@ -1 +0,0 @@
1
- This is a user prompt.
 
 
tests/app/prompts/test_prompts/basic_test/nested/1 - nested_test_human.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ This is a nested user prompt.