navkast commited on
Commit
518f864
·
unverified ·
1 Parent(s): ad2d836

Fix all references in main and workbooks (#11)

Browse files

* Fix all references in main and workbooks

* commit notebooks

src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import json\n",
10
  "import os\n",
@@ -22,7 +30,7 @@
22
  "print(os.getcwd())\n",
23
  "\n",
24
  "\n",
25
- "with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
@@ -31,9 +39,54 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": null,
35
  "metadata": {},
36
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "import asyncio\n",
39
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import json\n",
18
  "import os\n",
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "\u001b[2m2024-09-16 15:21:49\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:21:49\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:21:49\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:21:49\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:21:51\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:21:51\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1125\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:21:51\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:21:51\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1052\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m104\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:21:52\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:21:52\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1052\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m116\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:21:52\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:21:52\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1051\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m108\u001b[0m\n",
67
+ "{\n",
68
+ " \"investment_banking_group\": \"Private Funds\",\n",
69
+ " \"confidence\": 0.9,\n",
70
+ " \"reasoning\": \"The candidate's role as an Investment Professional at Accel-KKR, a private equity firm focused on technology investments, aligns closely with the PRIVATE_FUNDS group. Their work involves acquisitions, buyouts, and structured investments, which are typical activities in private equity. Additionally, the candidate's experience at Fidelity Investments in corporate finance suggests a strong background in financial transactions, further supporting this classification.\"\n",
71
+ "}\n",
72
+ "{\n",
73
+ " \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
74
+ " \"confidence\": 0.8,\n",
75
+ " \"reasoning\": \"The candidate worked in the FFAS Corporate Finance division at Fidelity Investments, a major player in the financial services sector. This role likely involved financial advisory services related to financial institutions, aligning with the focus of the FINANCIAL_INSTITUTIONS group. The candidate's experience at Fidelity, combined with their education in Business Economics, supports this classification, although the lack of specific details in the job description leaves some uncertainty.\"\n",
76
+ "}\n",
77
+ "{\n",
78
+ " \"investment_banking_group\": \"Generalist\",\n",
79
+ " \"confidence\": 0.8,\n",
80
+ " \"reasoning\": \"The candidate worked in the Investment Banking division at William Blair & Company, a firm known for providing a wide range of financial advisory services, which suggests a generalist role. The absence of specific details in the work experience description indicates that the candidate may have been involved in various transactions across different sectors rather than specializing in a particular area. Additionally, their subsequent role at Accel-KKR, a private equity firm focused on technology, further supports the idea of a generalist background prior to specializing.\"\n",
81
+ "}\n",
82
+ "{\n",
83
+ " \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
84
+ " \"confidence\": 0.8,\n",
85
+ " \"reasoning\": \"The candidate worked in the PI Corporate Finance division at Fidelity Investments, a major player in the financial services sector. This role likely involved financial advisory services related to financial institutions, aligning with the FINANCIAL_INSTITUTIONS group. The candidate's experience at Fidelity, combined with their background in investment banking and private equity, supports this classification, although the lack of specific details in the work experience description leads to a slightly lower confidence level.\"\n",
86
+ "}\n"
87
+ ]
88
+ }
89
+ ],
90
  "source": [
91
  "import asyncio\n",
92
  "\n",
src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import json\n",
10
  "import os\n",
@@ -22,7 +30,7 @@
22
  "print(os.getcwd())\n",
23
  "\n",
24
  "\n",
25
- "with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
@@ -31,9 +39,58 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": null,
35
  "metadata": {},
36
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "import asyncio\n",
39
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import json\n",
18
  "import os\n",
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "\u001b[2m2024-09-16 15:21:44\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:21:44\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:21:44\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:21:44\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1084\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m99\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:21:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:21:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1158\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m114\u001b[0m\n",
67
+ "{\n",
68
+ " \"investing_focus_asset_class\": \"Unclassifiable\",\n",
69
+ " \"confidence\": 0.3,\n",
70
+ " \"reasoning\": \"The specific work experience at Fidelity Investments as \\\"FFAS Corporate Finance\\\" does not provide enough detail about the nature of the investments or financial activities undertaken during that time. Without a description of the specific focus or asset class related to this role, it is difficult to classify it accurately. The lack of information leads to a low confidence level in making a definitive classification.\",\n",
71
+ " \"other_description\": null\n",
72
+ "}\n",
73
+ "{\n",
74
+ " \"investing_focus_asset_class\": \"Unclassifiable\",\n",
75
+ " \"confidence\": 0.3,\n",
76
+ " \"reasoning\": \"The specific work experience item at Fidelity Investments as \\\"PI Corporate Finance\\\" does not provide any details about the nature of the investments or financial activities undertaken in that role. Without specific information regarding the focus on asset classes or investment strategies, it is difficult to classify this experience accurately. The lack of a description or responsibilities limits the ability to determine a clear investing focus or asset class.\",\n",
77
+ " \"other_description\": null\n",
78
+ "}\n",
79
+ "{\n",
80
+ " \"investing_focus_asset_class\": \"Unclassifiable\",\n",
81
+ " \"confidence\": 0.3,\n",
82
+ " \"reasoning\": \"The specific work experience at William Blair & Company is labeled as \\\"Investment Banking,\\\" but there is no detailed description of the responsibilities or types of transactions involved. Without specific information on whether the focus was on equity, debt, mergers, or acquisitions, it is difficult to classify this experience into one of the predefined investing focus or asset class categories. The lack of detail leads to uncertainty in classification.\",\n",
83
+ " \"other_description\": null\n",
84
+ "}\n",
85
+ "{\n",
86
+ " \"investing_focus_asset_class\": \"Private Equity / Buyouts\",\n",
87
+ " \"confidence\": 0.9,\n",
88
+ " \"reasoning\": \"The specific work experience at Accel-KKR indicates a focus on private equity investments, particularly in technology and software sectors. The description highlights typical transactions such as acquisitions, buyouts of divisions from public companies, and take-private transactions, which are all characteristic of private equity buyouts. This aligns well with the classification of PRIVATE_EQUITY_BUYOUTS. The high confidence level reflects the clear alignment of the job responsibilities with this asset class.\",\n",
89
+ " \"other_description\": null\n",
90
+ "}\n"
91
+ ]
92
+ }
93
+ ],
94
  "source": [
95
  "import asyncio\n",
96
  "\n",
src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import json\n",
10
  "import os\n",
@@ -22,7 +30,7 @@
22
  "print(os.getcwd())\n",
23
  "\n",
24
  "\n",
25
- "with open(\"../tests/test_data/eric_armagost.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
@@ -31,9 +39,54 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": null,
35
  "metadata": {},
36
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "import asyncio\n",
39
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import json\n",
18
  "import os\n",
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/eric_armagost.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:21:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:21:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m87\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1080\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m88\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1154\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
67
+ "{\n",
68
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
69
+ " \"confidence\": 0.7,\n",
70
+ " \"reasoning\": \"The specific work experience item is at Fidelity Investments, a company clearly identified within the financial services sector. The title \\\"FFAS Corporate Finance\\\" suggests a focus on financial analysis and corporate finance activities, which aligns with the financial institutions category. However, the lack of detailed responsibilities in the description leads to a slightly lower confidence level.\"\n",
71
+ "}\n",
72
+ "{\n",
73
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
74
+ " \"confidence\": 0.8,\n",
75
+ " \"reasoning\": \"The specific work experience item indicates that the candidate worked at Fidelity Investments, a company clearly categorized under financial services. The title \\\"PI Corporate Finance\\\" suggests involvement in financial activities related to private investments, which aligns with the financial institutions sector. While the description lacks detail, the company name and industry classification provide strong evidence for this classification.\"\n",
76
+ "}\n",
77
+ "{\n",
78
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
79
+ " \"confidence\": 0.8,\n",
80
+ " \"reasoning\": \"The specific work experience item indicates that the candidate worked in Investment Banking at William Blair & Company, which is categorized under Financial Services. This aligns with the focus on financial institutions, as investment banking is a core component of this sector. The absence of a detailed description does not detract significantly from the classification, as the job title and company industry provide clear context.\"\n",
81
+ "}\n",
82
+ "{\n",
83
+ " \"investing_focus_sector\": \"Technology / Software / TMT\",\n",
84
+ " \"confidence\": 0.9,\n",
85
+ " \"reasoning\": \"The specific work experience at Accel-KKR clearly indicates a focus on investing in software and technology-enabled services companies. The description highlights that the firm is dedicated exclusively to this sector, which aligns directly with the TECHNOLOGY_SOFTWARE_TMT category. The confidence level is high due to the explicit mention of the firm's focus area in both the resume and the work experience item.\"\n",
86
+ "}\n"
87
+ ]
88
+ }
89
+ ],
90
  "source": [
91
  "import asyncio\n",
92
  "\n",
src/notebooks/classifiers/work_experience/work_experience_classifier.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import json\n",
10
  "import os\n",
@@ -22,7 +30,7 @@
22
  "print(os.getcwd())\n",
23
  "\n",
24
  "\n",
25
- "with open(\"../tests/test_data/hansae_catlett.json\") as f:\n",
26
  " data = json.load(f)\n",
27
  " # convert to linkedin profile\n",
28
  "\n",
@@ -31,9 +39,106 @@
31
  },
32
  {
33
  "cell_type": "code",
34
- "execution_count": null,
35
  "metadata": {},
36
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "import asyncio\n",
39
  "\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import json\n",
18
  "import os\n",
 
30
  "print(os.getcwd())\n",
31
  "\n",
32
  "\n",
33
+ "with open(\"../tests/test_data/sample_profiles/hansae_catlett.json\") as f:\n",
34
  " data = json.load(f)\n",
35
  " # convert to linkedin profile\n",
36
  "\n",
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "\u001b[2m2024-09-16 15:19:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:19:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:19:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:19:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:19:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1811\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m122\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m2005\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m134\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1816\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m142\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1816\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m139\u001b[0m\n",
67
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
68
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1811\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m142\u001b[0m\n",
69
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
70
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1812\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m147\u001b[0m\n",
71
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
72
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1847\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m119\u001b[0m\n",
73
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
74
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1813\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m155\u001b[0m\n",
75
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
76
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1814\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m165\u001b[0m\n",
77
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
78
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1815\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m182\u001b[0m\n",
79
+ "{\n",
80
+ " \"primary_job_type\": \"Full-time\",\n",
81
+ " \"secondary_job_type\": \"Investing\",\n",
82
+ " \"confidence\": 0.9,\n",
83
+ " \"reasoning\": \"The work experience as a Partner at HOF Capital is a full-time role in a venture capital firm, which aligns with the candidate's ongoing career in investing. Given that the candidate has been involved in venture capital and private equity roles, this position is classified as FULL_TIME. The secondary job type is classified as INVESTING, as the role directly involves investment activities typical of a venture capital firm. The candidate's previous experience as Vice President at Bessemer Venture Partners further supports this classification.\"\n",
84
+ "}\n",
85
+ "{\n",
86
+ " \"primary_job_type\": \"Full-time\",\n",
87
+ " \"secondary_job_type\": \"Investing\",\n",
88
+ " \"confidence\": 0.9,\n",
89
+ " \"reasoning\": \"The work experience item indicates that the candidate is a Co-Founder and General Partner at The MBA Fund, which is a venture capital firm. This role is ongoing and involves significant responsibilities, indicating it is a full-time position rather than an internship or part-time role. Therefore, it is classified as FULL_TIME. Additionally, since the role is directly related to venture capital and involves investing in startups, it fits the INVESTING category for the secondary job type. The candidate's extensive background in venture capital, as seen in their other roles, supports this classification.\"\n",
90
+ "}\n",
91
+ "{\n",
92
+ " \"primary_job_type\": \"Other\",\n",
93
+ " \"secondary_job_type\": \"Investing\",\n",
94
+ " \"confidence\": 0.9,\n",
95
+ " \"reasoning\": \"The work experience as a Board Observer at Rillavoice is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities rather than a full-time employment setup. Given that this position is part of a broader involvement in venture capital and investment activities, it does not fit the criteria for a full-time job. Therefore, it is classified as OTHER for the primary job type. For the secondary job type, since the role involves oversight and advisory functions in a startup context, it aligns with the INVESTING category, as it is related to venture capital activities.\"\n",
96
+ "}\n",
97
+ "{\n",
98
+ " \"primary_job_type\": \"Full-time\",\n",
99
+ " \"secondary_job_type\": \"CorpDev / Strategy\",\n",
100
+ " \"confidence\": 0.9,\n",
101
+ " \"reasoning\": \"The work experience as a Board Director at Luxury Presence is a long-term role from 2021 to 2024, indicating a significant commitment that aligns with a full-time position rather than a temporary or part-time role. Given that the candidate is involved in a board capacity, this suggests a leadership and strategic role rather than an advisory or internship position. For the secondary job type, the role of Board Director typically involves oversight and strategic decision-making, which aligns with CORPDEV_STRATEGY as it relates to corporate governance and strategic planning within the company.\"\n",
102
+ "}\n",
103
+ "{\n",
104
+ " \"primary_job_type\": \"Full-time\",\n",
105
+ " \"secondary_job_type\": \"Investing\",\n",
106
+ " \"confidence\": 0.9,\n",
107
+ " \"reasoning\": \"The work experience as an \\\"Investor\\\" at Spot AI from 2021 to 2024 indicates a professional investing role, which aligns with the secondary job type category of INVESTING. The candidate's full resume shows a strong background in venture capital and investment roles, including being a Vice President at Bessemer Venture Partners and a Co-Founder at The MBA Fund, both of which further support the classification as INVESTING. Since the role spans over three years and does not fit the criteria for an internship or other primary job types, it is classified as FULL_TIME for the primary job type.\"\n",
108
+ "}\n",
109
+ "{\n",
110
+ " \"primary_job_type\": \"Other\",\n",
111
+ " \"secondary_job_type\": \"Investing\",\n",
112
+ " \"confidence\": 0.9,\n",
113
+ " \"reasoning\": \"The work experience at Archy is classified as an INVESTING role because the title \\\"Investor\\\" indicates a professional investing position, which aligns with the candidate's experience in venture capital as seen in their role as Vice President at Bessemer Venture Partners and Co-Founder at The MBA Fund. The duration from 2022 to 2024 suggests a longer-term commitment, which is typical for investing roles rather than internships. The primary job type is classified as OTHER since there is no indication of regular employment or a formal structure like full-time or part-time; it appears to be more of an investment role without a traditional employment setup.\"\n",
114
+ "}\n",
115
+ "{\n",
116
+ " \"primary_job_type\": \"Full-time\",\n",
117
+ " \"secondary_job_type\": \"Investing\",\n",
118
+ " \"confidence\": 1.0,\n",
119
+ " \"reasoning\": \"The work experience as Vice President at Bessemer Venture Partners is classified as FULL_TIME because it is a full-time position that spans over four years, which aligns with the criteria for regular ongoing employment. The secondary job type is classified as INVESTING, as the role is within a venture capital firm, which involves professional investing activities. This is supported by the candidate's extensive background in venture capital and private equity, as indicated in both the resume and the specific work experience item.\"\n",
120
+ "}\n",
121
+ "{\n",
122
+ " \"primary_job_type\": \"Other\",\n",
123
+ " \"secondary_job_type\": \"Other\",\n",
124
+ " \"confidence\": 0.9,\n",
125
+ " \"reasoning\": \"The work experience as a Board Observer at VendorPM is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities rather than a full-time employment setup. Given that the candidate has been in this role from 2021 to 2024, it suggests a part-time or advisory capacity rather than a full-time position. Therefore, the Primary Job Type is classified as OTHER, as it does not fit the other categories. For the Secondary Job Type, the role of Board Observer does not involve direct investment activities but rather oversight, which does not fit into the INVESTING category. Thus, it is classified as OTHER as well.\"\n",
126
+ "}\n",
127
+ "{\n",
128
+ " \"primary_job_type\": \"Other\",\n",
129
+ " \"secondary_job_type\": \"CorpDev / Strategy\",\n",
130
+ " \"confidence\": 0.9,\n",
131
+ " \"reasoning\": \"The work experience as a Board Observer at MaintainX is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities. Given that this position is not a full-time role and does not indicate a regular employment setup, it does not fit the FULL_TIME category. The duration from 2021 to 2024 suggests a longer-term commitment, but it is not a traditional employment role, thus it is classified as OTHER for the primary job type. For the secondary job type, as a Board Observer, the role involves strategic oversight rather than direct investment activities, which aligns it more closely with the CORPDEV_STRATEGY category rather than INVESTING.\"\n",
132
+ "}\n",
133
+ "{\n",
134
+ " \"primary_job_type\": \"Other\",\n",
135
+ " \"secondary_job_type\": \"Investing\",\n",
136
+ " \"confidence\": 0.9,\n",
137
+ " \"reasoning\": \"The work experience at ServiceTitan is classified as an INVESTING role because the title \\\"Investor\\\" indicates a focus on investment activities, which aligns with the candidate's background in venture capital and investment roles as seen in their resume. The candidate has been involved in various investment capacities, including their role as Vice President at Bessemer Venture Partners and as a Co-Founder at The MBA Fund, both of which emphasize their expertise in investing. The duration of the role from 2021 to 2024 suggests a longer-term commitment, which is typical for professional investing roles rather than internships or part-time positions. For the Primary Job Type, since this role is not a full-time ongoing employment position but rather an investment role, it is classified as OTHER, as it does not fit the other primary categories.\"\n",
138
+ "}\n"
139
+ ]
140
+ }
141
+ ],
142
  "source": [
143
  "import asyncio\n",
144
  "\n",
src/vsp/app/main.py CHANGED
@@ -60,10 +60,17 @@ class ClassifiedWorkExperience(BaseModel):
60
 
61
  Attributes:
62
  position (Position): The original position item from the LinkedIn profile.
63
- work_experience_classification (WorkExperienceClassification): The general classification results for the work experience.
64
- investment_banking_classification (InvestmentBankingGroupClassification | None): The investment banking classification results, if applicable.
65
- investing_focus_asset_class_classification (InvestingFocusAssetClassClassification | None): The investing focus asset class classification results, if applicable.
66
- investing_focus_sector_classification (InvestingFocusSectorClassification | None): The investing focus sector classification results, if applicable.
 
 
 
 
 
 
 
67
  """
68
 
69
  position: Position
@@ -79,7 +86,8 @@ class LinkedinProfileClassificationResults(BaseModel):
79
 
80
  Attributes:
81
  classified_educations (Sequence[ClassifiedEducation]): A sequence of classified education items.
82
- classified_work_experiences (Sequence[ClassifiedWorkExperience]): A sequence of classified work experience items.
 
83
  """
84
 
85
  classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
@@ -97,11 +105,13 @@ class VspDataEnrichment:
97
  education_classifier (EducationClassifier): Classifier for education items.
98
  work_experience_classifier (WorkExperienceClassifier): Classifier for general work experiences.
99
  investment_banking_classifier (InvestmentBankingGroupClassifier): Classifier for investment banking groups.
100
- investing_focus_asset_class_classifier (InvestingFocusAssetClassClassifier): Classifier for investing focus asset classes.
 
 
101
  investing_focus_sector_classifier (InvestingFocusSectorClassifier): Classifier for investing focus sectors.
102
  """
103
 
104
- def __init__(self):
105
  """Initialize the VspDataEnrichment class with all required classifiers."""
106
  self._education_classifier = EducationClassifier()
107
  self._work_experience_classifier = WorkExperienceClassifier()
@@ -162,8 +172,10 @@ class VspDataEnrichment:
162
  classified_work_experience.investment_banking_classification = ib_classification
163
 
164
  if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING:
165
- asset_class_task = self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
166
- profile, position
 
 
167
  )
168
  sector_task = self._investing_focus_sector_classifier.classify_investing_focus_sector(
169
  profile, position
 
60
 
61
  Attributes:
62
  position (Position): The original position item from the LinkedIn profile.
63
+ work_experience_classification (WorkExperienceClassification): The general classification results
64
+ for the work experience.
65
+ investment_banking_classification (
66
+ InvestmentBankingGroupClassification | None
67
+ ): The investment banking classification results, if applicable.
68
+ investing_focus_asset_class_classification (
69
+ InvestingFocusAssetClassClassification | None
70
+ ): The investing focus asset class classification results, if applicable.
71
+ investing_focus_sector_classification (
72
+ InvestingFocusSectorClassification | None
73
+ ): The investing focus sector classification results, if applicable.
74
  """
75
 
76
  position: Position
 
86
 
87
  Attributes:
88
  classified_educations (Sequence[ClassifiedEducation]): A sequence of classified education items.
89
+ classified_work_experiences (Sequence[ClassifiedWorkExperience]): A sequence of classified work
90
+ experience items.
91
  """
92
 
93
  classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
 
105
  education_classifier (EducationClassifier): Classifier for education items.
106
  work_experience_classifier (WorkExperienceClassifier): Classifier for general work experiences.
107
  investment_banking_classifier (InvestmentBankingGroupClassifier): Classifier for investment banking groups.
108
+ investing_focus_asset_class_classifier (
109
+ InvestingFocusAssetClassClassifier
110
+ ): Classifier for investing focus asset classes.
111
  investing_focus_sector_classifier (InvestingFocusSectorClassifier): Classifier for investing focus sectors.
112
  """
113
 
114
+ def __init__(self) -> None:
115
  """Initialize the VspDataEnrichment class with all required classifiers."""
116
  self._education_classifier = EducationClassifier()
117
  self._work_experience_classifier = WorkExperienceClassifier()
 
172
  classified_work_experience.investment_banking_classification = ib_classification
173
 
174
  if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING:
175
+ asset_class_task = (
176
+ self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
177
+ profile, position
178
+ )
179
  )
180
  sector_task = self._investing_focus_sector_classifier.classify_investing_focus_sector(
181
  profile, position
tests/test_harness/enum_classifier_test.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any
5
 
6
  from pydantic import BaseModel
7
 
8
- from vsp.app.main import process_linkedin_profile
9
  from vsp.app.model.linkedin.linkedin_models import LinkedinProfile
10
  from vsp.app.model.vsp.vsp_models import VSPProfile
11
  from vsp.shared import logger_factory
@@ -45,7 +45,8 @@ def load_profiles() -> dict[str, tuple[LinkedinProfile, VSPProfile]]:
45
 
46
 
47
  async def compare_profiles(linkedin_profile: LinkedinProfile, classified_profile: VSPProfile) -> ComparisonResult:
48
- result = await process_linkedin_profile(linkedin_profile)
 
49
  comparisons = {}
50
  correct_enums = 0
51
  total_enums = 0
 
5
 
6
  from pydantic import BaseModel
7
 
8
+ from vsp.app.main import VspDataEnrichment
9
  from vsp.app.model.linkedin.linkedin_models import LinkedinProfile
10
  from vsp.app.model.vsp.vsp_models import VSPProfile
11
  from vsp.shared import logger_factory
 
45
 
46
 
47
  async def compare_profiles(linkedin_profile: LinkedinProfile, classified_profile: VSPProfile) -> ComparisonResult:
48
+ vsp_enrichment = VspDataEnrichment()
49
+ result = await vsp_enrichment.process_linkedin_profile(linkedin_profile)
50
  comparisons = {}
51
  correct_enums = 0
52
  total_enums = 0
tests/vsp/app/test_main.py CHANGED
@@ -20,7 +20,7 @@ from vsp.app.classifiers.work_experience.investment_banking_group_classifier imp
20
  InvestmentBankingGroup,
21
  InvestmentBankingGroupClassification,
22
  )
23
- from vsp.app.main import LinkedinProfileClassificationResults, process_linkedin_profile
24
  from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
25
 
26
 
@@ -107,7 +107,8 @@ async def test_process_linkedin_profile_comprehensive(sample_linkedin_profile):
107
  )
108
  )
109
 
110
- result = await process_linkedin_profile(sample_linkedin_profile)
 
111
 
112
  assert isinstance(result, LinkedinProfileClassificationResults)
113
  assert len(result.classified_educations) == 1
@@ -184,7 +185,8 @@ async def test_process_linkedin_profile_no_investing(sample_linkedin_profile):
184
  )
185
  )
186
 
187
- result = await process_linkedin_profile(sample_linkedin_profile)
 
188
 
189
  assert isinstance(result, LinkedinProfileClassificationResults)
190
  assert len(result.classified_educations) == 1
 
20
  InvestmentBankingGroup,
21
  InvestmentBankingGroupClassification,
22
  )
23
+ from vsp.app.main import LinkedinProfileClassificationResults, VspDataEnrichment
24
  from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
25
 
26
 
 
107
  )
108
  )
109
 
110
+ vsp_enrichment = VspDataEnrichment()
111
+ result = await vsp_enrichment.process_linkedin_profile(sample_linkedin_profile)
112
 
113
  assert isinstance(result, LinkedinProfileClassificationResults)
114
  assert len(result.classified_educations) == 1
 
185
  )
186
  )
187
 
188
+ vsp_enrichment = VspDataEnrichment()
189
+ result = await vsp_enrichment.process_linkedin_profile(sample_linkedin_profile)
190
 
191
  assert isinstance(result, LinkedinProfileClassificationResults)
192
  assert len(result.classified_educations) == 1