pquiggles commited on
Commit
0481dfc
·
2 Parent(s): 9347485 518f864

Merged 'main' into 'pquiggles/improvements' accepting 'main's changes

Browse files
src/notebooks/classifiers/work_experience/investment_banking_group_classifier.ipynb CHANGED
@@ -9,7 +9,7 @@
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
@@ -39,31 +39,45 @@
39
  },
40
  {
41
  "cell_type": "code",
42
- "execution_count": 3,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
- "\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
50
- "\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
51
- "\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
52
- "\u001b[2m2024-09-16 15:16:21\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  "{\n",
54
  " \"investment_banking_group\": \"Private Funds\",\n",
55
  " \"confidence\": 0.9,\n",
56
  " \"reasoning\": \"The candidate's role as an Investment Professional at Accel-KKR, a private equity firm focused on technology investments, aligns closely with the PRIVATE_FUNDS group. Their work involves acquisitions, buyouts, and structured investments, which are typical activities in private equity. Additionally, the candidate's experience at Fidelity Investments in corporate finance suggests a strong background in financial transactions, further supporting this classification.\"\n",
57
  "}\n",
58
  "{\n",
59
- " \"investment_banking_group\": \"Generalist\",\n",
60
  " \"confidence\": 0.8,\n",
61
- " \"reasoning\": \"The candidate worked in the Investment Banking division at William Blair & Company, a firm known for providing a wide range of financial advisory services, which suggests a generalist role. The absence of specific details in the work experience description indicates that the candidate may have been involved in various transactions across different sectors rather than specializing in a particular area. Additionally, their subsequent role at Accel-KKR, a private equity firm focused on technology, further supports the idea of a generalist background prior to specializing.\"\n",
62
  "}\n",
63
  "{\n",
64
- " \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
65
  " \"confidence\": 0.8,\n",
66
- " \"reasoning\": \"The candidate worked in the FFAS Corporate Finance division at Fidelity Investments, a major player in the financial services sector. This role likely involved financial advisory services related to financial institutions, aligning with the focus of the FINANCIAL_INSTITUTIONS group. The candidate's experience at Fidelity, combined with their education in Business Economics, supports this classification, although the lack of specific details in the job description leaves some uncertainty.\"\n",
67
  "}\n",
68
  "{\n",
69
  " \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
 
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
+ "\u001b[2m2024-09-16 15:21:49\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:21:49\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:21:49\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:21:49\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:21:50\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:21:51\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:21:51\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1125\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:21:51\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:21:51\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1052\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m104\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:21:52\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:21:52\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1052\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m116\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:21:52\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:21:52\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1051\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m108\u001b[0m\n",
67
  "{\n",
68
  " \"investment_banking_group\": \"Private Funds\",\n",
69
  " \"confidence\": 0.9,\n",
70
  " \"reasoning\": \"The candidate's role as an Investment Professional at Accel-KKR, a private equity firm focused on technology investments, aligns closely with the PRIVATE_FUNDS group. Their work involves acquisitions, buyouts, and structured investments, which are typical activities in private equity. Additionally, the candidate's experience at Fidelity Investments in corporate finance suggests a strong background in financial transactions, further supporting this classification.\"\n",
71
  "}\n",
72
  "{\n",
73
+ " \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
74
  " \"confidence\": 0.8,\n",
75
+ " \"reasoning\": \"The candidate worked in the FFAS Corporate Finance division at Fidelity Investments, a major player in the financial services sector. This role likely involved financial advisory services related to financial institutions, aligning with the focus of the FINANCIAL_INSTITUTIONS group. The candidate's experience at Fidelity, combined with their education in Business Economics, supports this classification, although the lack of specific details in the job description leaves some uncertainty.\"\n",
76
  "}\n",
77
  "{\n",
78
+ " \"investment_banking_group\": \"Generalist\",\n",
79
  " \"confidence\": 0.8,\n",
80
+ " \"reasoning\": \"The candidate worked in the Investment Banking division at William Blair & Company, a firm known for providing a wide range of financial advisory services, which suggests a generalist role. The absence of specific details in the work experience description indicates that the candidate may have been involved in various transactions across different sectors rather than specializing in a particular area. Additionally, their subsequent role at Accel-KKR, a private equity firm focused on technology, further supports the idea of a generalist background prior to specializing.\"\n",
81
  "}\n",
82
  "{\n",
83
  " \"investment_banking_group\": \"Financial Institutions Group (FIG) / Fintech\",\n",
src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb CHANGED
@@ -2,14 +2,14 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
@@ -39,31 +39,31 @@
39
  },
40
  {
41
  "cell_type": "code",
42
- "execution_count": 4,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
- "\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
- "\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
- "\u001b[2m2024-09-16 15:14:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
- "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
- "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
- "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
- "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
- "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
- "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
- "\u001b[2m2024-09-16 15:14:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
- "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
- "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
61
- "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
- "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1084\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
63
- "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
- "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m99\u001b[0m\n",
65
- "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
- "\u001b[2m2024-09-16 15:14:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1158\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m114\u001b[0m\n",
67
  "{\n",
68
  " \"investing_focus_asset_class\": \"Unclassifiable\",\n",
69
  " \"confidence\": 0.3,\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
+ "\u001b[2m2024-09-16 15:21:44\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:21:44\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:21:44\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:21:44\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:21:45\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1084\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:21:46\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1085\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m99\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:21:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:21:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1158\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m114\u001b[0m\n",
67
  "{\n",
68
  " \"investing_focus_asset_class\": \"Unclassifiable\",\n",
69
  " \"confidence\": 0.3,\n",
src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb CHANGED
@@ -9,7 +9,7 @@
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
@@ -46,33 +46,33 @@
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
- "\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
- "\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
- "\u001b[2m2024-09-16 15:13:36\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
- "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
- "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
- "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
- "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
- "\u001b[2m2024-09-16 15:13:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
- "\u001b[2m2024-09-16 15:13:38\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
- "\u001b[2m2024-09-16 15:13:38\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
- "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
- "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m88\u001b[0m\n",
61
- "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
- "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1080\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n",
63
- "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
- "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
65
- "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
- "\u001b[2m2024-09-16 15:13:40\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1154\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
67
  "{\n",
68
  " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
69
  " \"confidence\": 0.7,\n",
70
- " \"reasoning\": \"The specific work experience item indicates that the candidate worked at Fidelity Investments, a company clearly categorized under financial services. The title \\\"FFAS Corporate Finance\\\" suggests involvement in financial analysis and corporate finance activities, which aligns with the financial institutions sector. However, the lack of detailed responsibilities in the description leads to a slightly lower confidence level.\"\n",
71
  "}\n",
72
  "{\n",
73
  " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
74
  " \"confidence\": 0.8,\n",
75
- " \"reasoning\": \"The specific work experience item indicates that the candidate worked at Fidelity Investments, a company clearly categorized under financial services. The title \\\"PI Corporate Finance\\\" suggests involvement in financial activities, likely related to investment management or corporate finance within the financial sector. While the description lacks detail, the context of the company and the role strongly supports classification in the financial institutions sector.\"\n",
76
  "}\n",
77
  "{\n",
78
  " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
 
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
 
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:21:30\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:21:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:21:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m87\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1080\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m88\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1081\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:21:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1154\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m96\u001b[0m\n",
67
  "{\n",
68
  " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
69
  " \"confidence\": 0.7,\n",
70
+ " \"reasoning\": \"The specific work experience item is at Fidelity Investments, a company clearly identified within the financial services sector. The title \\\"FFAS Corporate Finance\\\" suggests a focus on financial analysis and corporate finance activities, which aligns with the financial institutions category. However, the lack of detailed responsibilities in the description leads to a slightly lower confidence level.\"\n",
71
  "}\n",
72
  "{\n",
73
  " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
74
  " \"confidence\": 0.8,\n",
75
+ " \"reasoning\": \"The specific work experience item indicates that the candidate worked at Fidelity Investments, a company clearly categorized under financial services. The title \\\"PI Corporate Finance\\\" suggests involvement in financial activities related to private investments, which aligns with the financial institutions sector. While the description lacks detail, the company name and industry classification provide strong evidence for this classification.\"\n",
76
  "}\n",
77
  "{\n",
78
  " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
src/notebooks/classifiers/work_experience/work_experience_classifier.ipynb CHANGED
@@ -2,14 +2,14 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
- "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
@@ -39,82 +39,102 @@
39
  },
40
  {
41
  "cell_type": "code",
42
- "execution_count": 4,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
50
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
51
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
52
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
53
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
54
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
55
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
56
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
57
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
58
- "\u001b[2m2024-09-16 15:16:55\u001b[0m [\u001b[32m\u001b[1mdebug \u001b[0m] \u001b[1mLLM cache hit \u001b[0m\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "{\n",
60
  " \"primary_job_type\": \"Full-time\",\n",
61
  " \"secondary_job_type\": \"Investing\",\n",
62
  " \"confidence\": 0.9,\n",
63
- " \"reasoning\": \"The work experience as a Partner at HOF Capital is a full-time role in a venture capital firm, which aligns with the candidate's ongoing career in investing. Given that the candidate has been involved in venture capital and private equity roles, this position is classified as FULL_TIME. The secondary job type is classified as INVESTING, as the role directly involves investment activities typical of a venture capital firm. This classification is supported by the candidate's previous experience as Vice President at Bessemer Venture Partners and their role as Co-Founder at The MBA Fund, both of which are also in the investing domain.\"\n",
64
  "}\n",
65
  "{\n",
66
  " \"primary_job_type\": \"Full-time\",\n",
67
  " \"secondary_job_type\": \"Investing\",\n",
68
  " \"confidence\": 0.9,\n",
69
- " \"reasoning\": \"The work experience at The MBA Fund is classified as FULL_TIME because it is a co-founding role in a venture capital firm, indicating ongoing and regular employment. The responsibilities described involve significant engagement with startups and investment activities, which aligns with a full-time commitment. For the secondary job type, it is classified as INVESTING since the role involves backing and supporting startup founders, which is characteristic of venture capital activities. This is supported by the description of the firm and the candidate's role as a General Partner, which typically involves making investment decisions and managing a portfolio of investments.\"\n",
70
  "}\n",
71
  "{\n",
72
- " \"primary_job_type\": \"Full-time\",\n",
73
  " \"secondary_job_type\": \"Investing\",\n",
74
- " \"confidence\": 1.0,\n",
75
- " \"reasoning\": \"The work experience as Vice President at Bessemer Venture Partners is classified as FULL_TIME because it is a full-time position that spans over four years, which aligns with the criteria for regular ongoing employment. The secondary job type is classified as INVESTING, as the role is within a venture capital firm, which involves professional investing activities. This is supported by the candidate's extensive background in venture capital and private equity, as indicated in both the resume and the specific work experience item.\"\n",
76
  "}\n",
77
  "{\n",
78
- " \"primary_job_type\": \"Other\",\n",
79
- " \"secondary_job_type\": \"Other\",\n",
80
  " \"confidence\": 0.9,\n",
81
- " \"reasoning\": \"The work experience as a Board Observer at Rillavoice is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities. Given that this position is not a full-time employment role and does not fit the criteria for an internship, it is classified as OTHER for the primary job type. For the secondary job type, since the role involves oversight and strategic input in a software company, it does not fit into the categories of investing, banking, or consulting, thus it is classified as OTHER as well.\"\n",
82
  "}\n",
83
  "{\n",
84
- " \"primary_job_type\": \"Other\",\n",
85
  " \"secondary_job_type\": \"Investing\",\n",
86
  " \"confidence\": 0.9,\n",
87
- " \"reasoning\": \"The work experience at Archy is classified as an INVESTING role because the title \\\"Investor\\\" indicates a professional investing position, which aligns with the candidate's experience in venture capital as seen in their role as Vice President at Bessemer Venture Partners and Co-Founder at The MBA Fund. The duration from 2022 to 2024 suggests a longer-term commitment, which is typical for investing roles rather than internships. The primary job type is classified as OTHER since there is no indication of full-time employment or internship status, and it does not fit into the other primary categories.\"\n",
88
  "}\n",
89
  "{\n",
90
- " \"primary_job_type\": \"Advisory / Board / Independent Investor\",\n",
91
- " \"secondary_job_type\": \"Other\",\n",
92
  " \"confidence\": 0.9,\n",
93
- " \"reasoning\": \"The work experience as a Board Observer at MaintainX is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities. Given that this position is not a full-time role and does not indicate a regular employment setup, it does not fit the FULL_TIME category. The duration from 2021 to 2024 suggests a longer-term commitment, but the nature of the role indicates it is more advisory in nature. Therefore, I classify it as ADVISORY_BOARD_INVESTOR for the primary job type. For the secondary job type, since the role involves oversight and strategic input rather than direct investment activities, it does not fit into the INVESTING category. Instead, it aligns more closely with the OTHER category, as it does not fit the other defined roles.\"\n",
94
  "}\n",
95
  "{\n",
96
  " \"primary_job_type\": \"Full-time\",\n",
97
  " \"secondary_job_type\": \"Investing\",\n",
98
- " \"confidence\": 0.9,\n",
99
- " \"reasoning\": \"The work experience at Spot AI is classified as an INVESTING role because the title \\\"Investor\\\" indicates a professional investing position, which aligns with the candidate's experience in venture capital and investment roles as seen in their resume. The duration from 2021 to 2024 suggests a long-term commitment, which rules out the possibility of it being an internship. The primary job type is classified as FULL_TIME since the role is ongoing and likely compensated, fitting the criteria for regular employment.\"\n",
100
  "}\n",
101
  "{\n",
102
  " \"primary_job_type\": \"Other\",\n",
103
- " \"secondary_job_type\": \"Entrepreneur / Founder\",\n",
104
  " \"confidence\": 0.9,\n",
105
- " \"reasoning\": \"The work experience as a Board Observer at VendorPM is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities rather than a full-time employment setup. Given that the candidate has been involved in this role from 2021 to 2024, it suggests a part-time or advisory capacity rather than a full-time position. Therefore, the Primary Job Type is classified as OTHER, as it does not fit the other categories. For the Secondary Job Type, this role is best classified as ENTREPRENEUR_FOUNDER since it involves oversight and advisory functions in a startup environment, which is common for board observer roles in venture-backed companies.\"\n",
106
  "}\n",
107
  "{\n",
108
- " \"primary_job_type\": \"Full-time\",\n",
109
  " \"secondary_job_type\": \"CorpDev / Strategy\",\n",
110
  " \"confidence\": 0.9,\n",
111
- " \"reasoning\": \"The work experience as a Board Director at Luxury Presence is a long-term role from 2021 to 2024, indicating a significant commitment that aligns with a full-time position rather than a temporary or part-time role. Given that this position involves governance and oversight, it does not fit the criteria for an internship or extracurricular activity. In terms of secondary job type, the role of Board Director typically involves strategic decision-making and oversight, which aligns with CORPDEV_STRATEGY as it is a corporate governance role rather than a direct investment role.\"\n",
112
  "}\n",
113
  "{\n",
114
  " \"primary_job_type\": \"Other\",\n",
115
  " \"secondary_job_type\": \"Investing\",\n",
116
  " \"confidence\": 0.9,\n",
117
- " \"reasoning\": \"The work experience at ServiceTitan is classified as an INVESTING role because the title \\\"Investor\\\" indicates a focus on investment activities, which aligns with the candidate's background in venture capital and investment roles as seen in their resume. The candidate has been involved in various investment capacities, including their role as Vice President at Bessemer Venture Partners and as a Co-Founder at The MBA Fund, both of which emphasize their expertise in investing. The duration of the role from 2021 to 2024 suggests a longer-term commitment, which is typical for professional investing roles rather than internships or part-time positions. For the Primary Job Type, since this role is not a full-time position but rather an investment role, it is classified as OTHER, as it does not fit the other primary categories like FULL_TIME or INTERNSHIP.\"\n",
118
  "}\n"
119
  ]
120
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
13
  ]
14
  }
15
  ],
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
+ "\u001b[2m2024-09-16 15:19:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
50
+ "\u001b[2m2024-09-16 15:19:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
51
+ "\u001b[2m2024-09-16 15:19:55\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
52
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
53
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
54
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
55
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
56
+ "\u001b[2m2024-09-16 15:19:56\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
57
+ "\u001b[2m2024-09-16 15:19:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
58
+ "\u001b[2m2024-09-16 15:19:57\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n",
59
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1811\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m122\u001b[0m\n",
61
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m2005\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m134\u001b[0m\n",
63
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
64
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1816\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m142\u001b[0m\n",
65
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
66
+ "\u001b[2m2024-09-16 15:19:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1816\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m139\u001b[0m\n",
67
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
68
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1811\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m142\u001b[0m\n",
69
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
70
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1812\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m147\u001b[0m\n",
71
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
72
+ "\u001b[2m2024-09-16 15:19:59\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1847\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m119\u001b[0m\n",
73
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
74
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1813\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m155\u001b[0m\n",
75
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
76
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1814\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m165\u001b[0m\n",
77
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
78
+ "\u001b[2m2024-09-16 15:20:00\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1815\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m182\u001b[0m\n",
79
  "{\n",
80
  " \"primary_job_type\": \"Full-time\",\n",
81
  " \"secondary_job_type\": \"Investing\",\n",
82
  " \"confidence\": 0.9,\n",
83
+ " \"reasoning\": \"The work experience as a Partner at HOF Capital is a full-time role in a venture capital firm, which aligns with the candidate's ongoing career in investing. Given that the candidate has been involved in venture capital and private equity roles, this position is classified as FULL_TIME. The secondary job type is classified as INVESTING, as the role directly involves investment activities typical of a venture capital firm. The candidate's previous experience as Vice President at Bessemer Venture Partners further supports this classification.\"\n",
84
  "}\n",
85
  "{\n",
86
  " \"primary_job_type\": \"Full-time\",\n",
87
  " \"secondary_job_type\": \"Investing\",\n",
88
  " \"confidence\": 0.9,\n",
89
+ " \"reasoning\": \"The work experience item indicates that the candidate is a Co-Founder and General Partner at The MBA Fund, which is a venture capital firm. This role is ongoing and involves significant responsibilities, indicating it is a full-time position rather than an internship or part-time role. Therefore, it is classified as FULL_TIME. Additionally, since the role is directly related to venture capital and involves investing in startups, it fits the INVESTING category for the secondary job type. The candidate's extensive background in venture capital, as seen in their other roles, supports this classification.\"\n",
90
  "}\n",
91
  "{\n",
92
+ " \"primary_job_type\": \"Other\",\n",
93
  " \"secondary_job_type\": \"Investing\",\n",
94
+ " \"confidence\": 0.9,\n",
95
+ " \"reasoning\": \"The work experience as a Board Observer at Rillavoice is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities rather than a full-time employment setup. Given that this position is part of a broader involvement in venture capital and investment activities, it does not fit the criteria for a full-time job. Therefore, it is classified as OTHER for the primary job type. For the secondary job type, since the role involves oversight and advisory functions in a startup context, it aligns with the INVESTING category, as it is related to venture capital activities.\"\n",
96
  "}\n",
97
  "{\n",
98
+ " \"primary_job_type\": \"Full-time\",\n",
99
+ " \"secondary_job_type\": \"CorpDev / Strategy\",\n",
100
  " \"confidence\": 0.9,\n",
101
+ " \"reasoning\": \"The work experience as a Board Director at Luxury Presence is a long-term role from 2021 to 2024, indicating a significant commitment that aligns with a full-time position rather than a temporary or part-time role. Given that the candidate is involved in a board capacity, this suggests a leadership and strategic role rather than an advisory or internship position. For the secondary job type, the role of Board Director typically involves oversight and strategic decision-making, which aligns with CORPDEV_STRATEGY as it relates to corporate governance and strategic planning within the company.\"\n",
102
  "}\n",
103
  "{\n",
104
+ " \"primary_job_type\": \"Full-time\",\n",
105
  " \"secondary_job_type\": \"Investing\",\n",
106
  " \"confidence\": 0.9,\n",
107
+ " \"reasoning\": \"The work experience as an \\\"Investor\\\" at Spot AI from 2021 to 2024 indicates a professional investing role, which aligns with the secondary job type category of INVESTING. The candidate's full resume shows a strong background in venture capital and investment roles, including being a Vice President at Bessemer Venture Partners and a Co-Founder at The MBA Fund, both of which further support the classification as INVESTING. Since the role spans over three years and does not fit the criteria for an internship or other primary job types, it is classified as FULL_TIME for the primary job type.\"\n",
108
  "}\n",
109
  "{\n",
110
+ " \"primary_job_type\": \"Other\",\n",
111
+ " \"secondary_job_type\": \"Investing\",\n",
112
  " \"confidence\": 0.9,\n",
113
+ " \"reasoning\": \"The work experience at Archy is classified as an INVESTING role because the title \\\"Investor\\\" indicates a professional investing position, which aligns with the candidate's experience in venture capital as seen in their role as Vice President at Bessemer Venture Partners and Co-Founder at The MBA Fund. The duration from 2022 to 2024 suggests a longer-term commitment, which is typical for investing roles rather than internships. The primary job type is classified as OTHER since there is no indication of regular employment or a formal structure like full-time or part-time; it appears to be more of an investment role without a traditional employment setup.\"\n",
114
  "}\n",
115
  "{\n",
116
  " \"primary_job_type\": \"Full-time\",\n",
117
  " \"secondary_job_type\": \"Investing\",\n",
118
+ " \"confidence\": 1.0,\n",
119
+ " \"reasoning\": \"The work experience as Vice President at Bessemer Venture Partners is classified as FULL_TIME because it is a full-time position that spans over four years, which aligns with the criteria for regular ongoing employment. The secondary job type is classified as INVESTING, as the role is within a venture capital firm, which involves professional investing activities. This is supported by the candidate's extensive background in venture capital and private equity, as indicated in both the resume and the specific work experience item.\"\n",
120
  "}\n",
121
  "{\n",
122
  " \"primary_job_type\": \"Other\",\n",
123
+ " \"secondary_job_type\": \"Other\",\n",
124
  " \"confidence\": 0.9,\n",
125
+ " \"reasoning\": \"The work experience as a Board Observer at VendorPM is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities rather than a full-time employment setup. Given that the candidate has been in this role from 2021 to 2024, it suggests a part-time or advisory capacity rather than a full-time position. Therefore, the Primary Job Type is classified as OTHER, as it does not fit the other categories. For the Secondary Job Type, the role of Board Observer does not involve direct investment activities but rather oversight, which does not fit into the INVESTING category. Thus, it is classified as OTHER as well.\"\n",
126
  "}\n",
127
  "{\n",
128
+ " \"primary_job_type\": \"Other\",\n",
129
  " \"secondary_job_type\": \"CorpDev / Strategy\",\n",
130
  " \"confidence\": 0.9,\n",
131
+ " \"reasoning\": \"The work experience as a Board Observer at MaintainX is a role that typically involves providing guidance and oversight to the company's management team, which aligns with advisory responsibilities. Given that this position is not a full-time role and does not indicate a regular employment setup, it does not fit the FULL_TIME category. The duration from 2021 to 2024 suggests a longer-term commitment, but it is not a traditional employment role, thus it is classified as OTHER for the primary job type. For the secondary job type, as a Board Observer, the role involves strategic oversight rather than direct investment activities, which aligns it more closely with the CORPDEV_STRATEGY category rather than INVESTING.\"\n",
132
  "}\n",
133
  "{\n",
134
  " \"primary_job_type\": \"Other\",\n",
135
  " \"secondary_job_type\": \"Investing\",\n",
136
  " \"confidence\": 0.9,\n",
137
+ " \"reasoning\": \"The work experience at ServiceTitan is classified as an INVESTING role because the title \\\"Investor\\\" indicates a focus on investment activities, which aligns with the candidate's background in venture capital and investment roles as seen in their resume. The candidate has been involved in various investment capacities, including their role as Vice President at Bessemer Venture Partners and as a Co-Founder at The MBA Fund, both of which emphasize their expertise in investing. The duration of the role from 2021 to 2024 suggests a longer-term commitment, which is typical for professional investing roles rather than internships or part-time positions. For the Primary Job Type, since this role is not a full-time ongoing employment position but rather an investment role, it is classified as OTHER, as it does not fit the other primary categories.\"\n",
138
  "}\n"
139
  ]
140
  }
src/vsp/app/main.py CHANGED
@@ -17,18 +17,12 @@ Usage:
17
  """
18
 
19
  import asyncio
20
- from typing import Sequence, Mapping, List
21
-
22
- import calendar
23
- from datetime import date
24
-
25
 
26
  from pydantic import BaseModel, Field
27
 
28
  from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
29
  from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
30
- PrimaryJobType,
31
- SecondaryJobType,
32
  WorkExperienceClassification,
33
  WorkExperienceClassifier,
34
  )
@@ -47,8 +41,6 @@ from vsp.app.classifiers.work_experience.investment_banking_group_classifier imp
47
  from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
48
 
49
 
50
- from collections import defaultdict
51
-
52
  class ClassifiedEducation(BaseModel):
53
  """
54
  Represents a classified education item from a LinkedIn profile.
@@ -68,10 +60,17 @@ class ClassifiedWorkExperience(BaseModel):
68
 
69
  Attributes:
70
  position (Position): The original position item from the LinkedIn profile.
71
- work_experience_classification (WorkExperienceClassification): The general classification results for the work experience.
72
- investment_banking_classification (InvestmentBankingGroupClassification | None): The investment banking classification results, if applicable.
73
- investing_focus_asset_class_classification (InvestingFocusAssetClassClassification | None): The investing focus asset class classification results, if applicable.
74
- investing_focus_sector_classification (InvestingFocusSectorClassification | None): The investing focus sector classification results, if applicable.
 
 
 
 
 
 
 
75
  """
76
 
77
  position: Position
@@ -87,13 +86,12 @@ class LinkedinProfileClassificationResults(BaseModel):
87
 
88
  Attributes:
89
  classified_educations (Sequence[ClassifiedEducation]): A sequence of classified education items.
90
- classified_work_experiences (Sequence[ClassifiedWorkExperience]): A sequence of classified work experience items.
 
91
  """
92
 
93
  classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
94
  classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
95
- full_time_work_experience_years: float = Field(default=0.0)
96
- full_time_work_experience_by_secondary: Mapping[SecondaryJobType, float] = Field(default_factory=dict)
97
 
98
 
99
  class VspDataEnrichment:
@@ -107,11 +105,13 @@ class VspDataEnrichment:
107
  education_classifier (EducationClassifier): Classifier for education items.
108
  work_experience_classifier (WorkExperienceClassifier): Classifier for general work experiences.
109
  investment_banking_classifier (InvestmentBankingGroupClassifier): Classifier for investment banking groups.
110
- investing_focus_asset_class_classifier (InvestingFocusAssetClassClassifier): Classifier for investing focus asset classes.
 
 
111
  investing_focus_sector_classifier (InvestingFocusSectorClassifier): Classifier for investing focus sectors.
112
  """
113
 
114
- def __init__(self):
115
  """Initialize the VspDataEnrichment class with all required classifiers."""
116
  self._education_classifier = EducationClassifier()
117
  self._work_experience_classifier = WorkExperienceClassifier()
@@ -119,83 +119,6 @@ class VspDataEnrichment:
119
  self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
120
  self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
121
 
122
-
123
- def estimate_full_time_experience_by_secondary_job_type(self, classified_work_experiences: List[ClassifiedWorkExperience]) -> Mapping[SecondaryJobType, float]:
124
- # Define current date
125
- current_date = date(2024, 9, 18)
126
-
127
- # List to store all events (start or end of intervals)
128
- events = []
129
-
130
- # Set to store all observed SecondaryJobTypes
131
- observed_secondary_job_types = set()
132
-
133
- for cwe in classified_work_experiences:
134
- classification = cwe.work_experience_classification.primary_job_type
135
- secondary_job_type = cwe.work_experience_classification.secondary_job_type
136
-
137
- if classification == PrimaryJobType.FULL_TIME and secondary_job_type:
138
- # Normalize start date
139
- start = cwe.position.start
140
- if not start or not start.year:
141
- continue
142
- start_year = start.year
143
- start_month = start.month if start.month else 1
144
- start_day = start.day if start.day else 1
145
- start_date = date(start_year, start_month, start_day)
146
-
147
- # Normalize end date
148
- end = cwe.position.end
149
- if end is None:
150
- end_date = current_date
151
- else:
152
- if not end.year:
153
- continue
154
- end_year = end.year
155
- end_month = end.month if end.month else 12
156
- if end.day:
157
- end_day = end.day
158
- else:
159
- # Get last day of the month
160
- _, end_day = calendar.monthrange(end_year, end_month)
161
- end_date = date(end_year, end_month, end_day)
162
-
163
- if start_date > end_date:
164
- continue # Skip invalid intervals
165
-
166
- # Add events for sweep-line algorithm
167
- events.append((start_date, 'start', secondary_job_type))
168
- events.append((end_date, 'end', secondary_job_type))
169
-
170
- observed_secondary_job_types.add(secondary_job_type)
171
-
172
- # Sort events by date
173
- events.sort(key=lambda x: x[0])
174
-
175
- active_secondary_job_types = set()
176
- last_date = None
177
- durations = defaultdict(int) # in days
178
-
179
- for event_date, event_type, secondary_job_type in events:
180
- if last_date is not None and event_date > last_date:
181
- interval_duration = (event_date - last_date).days
182
- # Distribute the interval_duration among active_secondary_job_types
183
- for active_type in active_secondary_job_types:
184
- durations[active_type] += interval_duration
185
-
186
- if event_type == 'start':
187
- active_secondary_job_types.add(secondary_job_type)
188
- elif event_type == 'end':
189
- active_secondary_job_types.discard(secondary_job_type)
190
-
191
- last_date = event_date
192
-
193
- # Convert durations from days to years
194
- durations_in_years = {stype: round(days / 365.25, 2) for stype, days in durations.items()}
195
-
196
- return durations_in_years
197
-
198
-
199
  async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
200
  """
201
  Process a LinkedIn profile and classify its education and work experiences.
@@ -242,14 +165,13 @@ class VspDataEnrichment:
242
  work_classification.primary_job_type.INTERNSHIP,
243
  work_classification.primary_job_type.EXTRACURRICULAR,
244
  }:
245
-
246
  if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTMENT_BANKING:
247
  ib_classification = await self._investment_banking_classifier.classify_investment_banking_group(
248
  profile, position
249
  )
250
  classified_work_experience.investment_banking_classification = ib_classification
251
 
252
- if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING and not work_classification.primary_job_type == work_classification.primary_job_type.ADVISORY_BOARD_INVESTOR:
253
  asset_class_task = (
254
  self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
255
  profile, position
@@ -266,11 +188,8 @@ class VspDataEnrichment:
266
 
267
  classified_work_experiences.append(classified_work_experience)
268
 
269
-
270
- experience_by_job_type = self.estimate_full_time_experience_by_secondary_job_type(classified_work_experiences)
271
- total_work_experience = sum(experience_by_job_type.values())
272
  return LinkedinProfileClassificationResults(
273
- classified_educations=classified_educations, classified_work_experiences=classified_work_experiences, full_time_work_experience_years=total_work_experience, full_time_work_experience_by_secondary=experience_by_job_type
274
  )
275
 
276
 
 
17
  """
18
 
19
  import asyncio
20
+ from typing import Sequence
 
 
 
 
21
 
22
  from pydantic import BaseModel, Field
23
 
24
  from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
25
  from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
 
 
26
  WorkExperienceClassification,
27
  WorkExperienceClassifier,
28
  )
 
41
  from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position
42
 
43
 
 
 
44
  class ClassifiedEducation(BaseModel):
45
  """
46
  Represents a classified education item from a LinkedIn profile.
 
60
 
61
  Attributes:
62
  position (Position): The original position item from the LinkedIn profile.
63
+ work_experience_classification (WorkExperienceClassification): The general classification results
64
+ for the work experience.
65
+ investment_banking_classification (
66
+ InvestmentBankingGroupClassification | None
67
+ ): The investment banking classification results, if applicable.
68
+ investing_focus_asset_class_classification (
69
+ InvestingFocusAssetClassClassification | None
70
+ ): The investing focus asset class classification results, if applicable.
71
+ investing_focus_sector_classification (
72
+ InvestingFocusSectorClassification | None
73
+ ): The investing focus sector classification results, if applicable.
74
  """
75
 
76
  position: Position
 
86
 
87
  Attributes:
88
  classified_educations (Sequence[ClassifiedEducation]): A sequence of classified education items.
89
+ classified_work_experiences (Sequence[ClassifiedWorkExperience]): A sequence of classified work
90
+ experience items.
91
  """
92
 
93
  classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
94
  classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
 
 
95
 
96
 
97
  class VspDataEnrichment:
 
105
  education_classifier (EducationClassifier): Classifier for education items.
106
  work_experience_classifier (WorkExperienceClassifier): Classifier for general work experiences.
107
  investment_banking_classifier (InvestmentBankingGroupClassifier): Classifier for investment banking groups.
108
+ investing_focus_asset_class_classifier (
109
+ InvestingFocusAssetClassClassifier
110
+ ): Classifier for investing focus asset classes.
111
  investing_focus_sector_classifier (InvestingFocusSectorClassifier): Classifier for investing focus sectors.
112
  """
113
 
114
+ def __init__(self) -> None:
115
  """Initialize the VspDataEnrichment class with all required classifiers."""
116
  self._education_classifier = EducationClassifier()
117
  self._work_experience_classifier = WorkExperienceClassifier()
 
119
  self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
120
  self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
123
  """
124
  Process a LinkedIn profile and classify its education and work experiences.
 
165
  work_classification.primary_job_type.INTERNSHIP,
166
  work_classification.primary_job_type.EXTRACURRICULAR,
167
  }:
 
168
  if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTMENT_BANKING:
169
  ib_classification = await self._investment_banking_classifier.classify_investment_banking_group(
170
  profile, position
171
  )
172
  classified_work_experience.investment_banking_classification = ib_classification
173
 
174
+ if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING:
175
  asset_class_task = (
176
  self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
177
  profile, position
 
188
 
189
  classified_work_experiences.append(classified_work_experience)
190
 
 
 
 
191
  return LinkedinProfileClassificationResults(
192
+ classified_educations=classified_educations, classified_work_experiences=classified_work_experiences
193
  )
194
 
195
 
tests/test_harness/enum_classifier_test.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any
5
 
6
  from pydantic import BaseModel
7
 
8
- from vsp.app.main import process_linkedin_profile
9
  from vsp.app.model.linkedin.linkedin_models import LinkedinProfile
10
  from vsp.app.model.vsp.vsp_models import VSPProfile
11
  from vsp.shared import logger_factory
@@ -45,7 +45,8 @@ def load_profiles() -> dict[str, tuple[LinkedinProfile, VSPProfile]]:
45
 
46
 
47
  async def compare_profiles(linkedin_profile: LinkedinProfile, classified_profile: VSPProfile) -> ComparisonResult:
48
- result = await process_linkedin_profile(linkedin_profile)
 
49
  comparisons = {}
50
  correct_enums = 0
51
  total_enums = 0
 
5
 
6
  from pydantic import BaseModel
7
 
8
+ from vsp.app.main import VspDataEnrichment
9
  from vsp.app.model.linkedin.linkedin_models import LinkedinProfile
10
  from vsp.app.model.vsp.vsp_models import VSPProfile
11
  from vsp.shared import logger_factory
 
45
 
46
 
47
  async def compare_profiles(linkedin_profile: LinkedinProfile, classified_profile: VSPProfile) -> ComparisonResult:
48
+ vsp_enrichment = VspDataEnrichment()
49
+ result = await vsp_enrichment.process_linkedin_profile(linkedin_profile)
50
  comparisons = {}
51
  correct_enums = 0
52
  total_enums = 0