navkast commited on
Commit
ad2d836
·
unverified ·
1 Parent(s): 324115d

Make main.py into a proper entrypoint (#10)

Browse files
Files changed (1) hide show
  1. src/vsp/app/main.py +144 -75
src/vsp/app/main.py CHANGED
@@ -1,15 +1,28 @@
1
- # src/vsp/app/main.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import asyncio
4
- import json
5
  from typing import Sequence
6
 
7
  from pydantic import BaseModel, Field
8
 
9
  from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
10
  from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
11
- PrimaryJobType,
12
- SecondaryJobType,
13
  WorkExperienceClassification,
14
  WorkExperienceClassifier,
15
  )
@@ -29,14 +42,29 @@ from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, P
29
 
30
 
31
  class ClassifiedEducation(BaseModel):
32
- """Represents a classified education item."""
 
 
 
 
 
 
33
 
34
  education: Education
35
  classification: EducationClassification
36
 
37
 
38
  class ClassifiedWorkExperience(BaseModel):
39
- """Represents a classified work experience item."""
 
 
 
 
 
 
 
 
 
40
 
41
  position: Position
42
  work_experience_classification: WorkExperienceClassification
@@ -46,91 +74,132 @@ class ClassifiedWorkExperience(BaseModel):
46
 
47
 
48
  class LinkedinProfileClassificationResults(BaseModel):
49
- """Represents the classification results for a LinkedIn profile."""
 
 
 
 
 
 
50
 
51
  classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
52
  classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
53
 
54
 
55
- async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
56
  """
57
- Process a LinkedIn profile and classify its education and work experiences.
58
 
59
- This function maintains the original order of educations and work experiences
60
- from the input profile while performing asynchronous classification tasks.
61
 
62
- Args:
63
- profile (LinkedinProfile): The LinkedIn profile to process.
64
-
65
- Returns:
66
- LinkedinProfileClassificationResults: The classification results for the profile.
 
67
  """
68
- education_classifier = EducationClassifier()
69
- work_experience_classifier = WorkExperienceClassifier()
70
- investment_banking_classifier = InvestmentBankingGroupClassifier()
71
- investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
72
- investing_focus_sector_classifier = InvestingFocusSectorClassifier()
73
-
74
- # Create tasks for education classification
75
- education_tasks = {
76
- education: education_classifier.classify_education(profile, education) for education in profile.educations
77
- }
78
-
79
- # Create tasks for work experience classification
80
- work_experience_tasks = {
81
- position: work_experience_classifier.classify_work_experience(profile, position)
82
- for position in profile.positions
83
- }
84
-
85
- # Wait for all education and work experience classifications to complete
86
- education_results = await asyncio.gather(*education_tasks.values())
87
- work_experience_results = await asyncio.gather(*work_experience_tasks.values())
88
-
89
- # Create ClassifiedEducation objects in the original order
90
- classified_educations = [
91
- ClassifiedEducation(education=education, classification=classification)
92
- for education, classification in zip(profile.educations, education_results)
93
- ]
94
-
95
- # Process work experiences and create ClassifiedWorkExperience objects
96
- classified_work_experiences = []
97
- for position, work_classification in zip(profile.positions, work_experience_results):
98
- classified_work_experience = ClassifiedWorkExperience(
99
- position=position, work_experience_classification=work_classification
100
- )
101
-
102
- if work_classification.primary_job_type not in {PrimaryJobType.INTERNSHIP, PrimaryJobType.EXTRACURRICULAR}:
103
- if work_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING:
104
- ib_classification = await investment_banking_classifier.classify_investment_banking_group(
105
- profile, position
106
- )
107
- classified_work_experience.investment_banking_classification = ib_classification
108
-
109
- if work_classification.secondary_job_type == SecondaryJobType.INVESTING:
110
- asset_class_task = investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
111
- profile, position
112
- )
113
- sector_task = investing_focus_sector_classifier.classify_investing_focus_sector(profile, position)
114
-
115
- asset_class_result, sector_result = await asyncio.gather(asset_class_task, sector_task)
116
-
117
- classified_work_experience.investing_focus_asset_class_classification = asset_class_result
118
- classified_work_experience.investing_focus_sector_classification = sector_result
119
 
120
- classified_work_experiences.append(classified_work_experience)
121
-
122
- return LinkedinProfileClassificationResults(
123
- classified_educations=classified_educations, classified_work_experiences=classified_work_experiences
124
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
 
127
  async def main() -> None:
128
  """
129
- Main function to demonstrate the usage of process_linkedin_profile.
 
 
 
130
  """
 
 
 
131
  with open("tests/test_data/sample_profiles/eric_armagost.json") as f:
132
- profile = LinkedinProfile.model_validate(json.load(f))
133
- results = await process_linkedin_profile(profile)
 
 
 
 
 
 
134
  print(results.model_dump_json(indent=2))
135
 
136
 
 
1
+ """
2
+ main.py
3
+
4
+ This module serves as the main executable file entrypoint for the VSP Data Enrichment project.
5
+ It provides functionality to process LinkedIn profiles and classify various aspects of a person's
6
+ educational and professional background.
7
+
8
+ The main class, VspDataEnrichment, encapsulates all the necessary classifiers and methods
9
+ to perform a comprehensive analysis of a LinkedIn profile.
10
+
11
+ Usage:
12
+ from vsp.app.main import VspDataEnrichment
13
+
14
+ vsp_enrichment = VspDataEnrichment()
15
+ results = await vsp_enrichment.process_linkedin_profile(linkedin_profile)
16
+
17
+ """
18
 
19
  import asyncio
 
20
  from typing import Sequence
21
 
22
  from pydantic import BaseModel, Field
23
 
24
  from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
25
  from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
 
 
26
  WorkExperienceClassification,
27
  WorkExperienceClassifier,
28
  )
 
42
 
43
 
44
  class ClassifiedEducation(BaseModel):
45
+ """
46
+ Represents a classified education item from a LinkedIn profile.
47
+
48
+ Attributes:
49
+ education (Education): The original education item from the LinkedIn profile.
50
+ classification (EducationClassification): The classification results for the education item.
51
+ """
52
 
53
  education: Education
54
  classification: EducationClassification
55
 
56
 
57
  class ClassifiedWorkExperience(BaseModel):
58
+ """
59
+ Represents a classified work experience item from a LinkedIn profile.
60
+
61
+ Attributes:
62
+ position (Position): The original position item from the LinkedIn profile.
63
+ work_experience_classification (WorkExperienceClassification): The general classification results for the work experience.
64
+ investment_banking_classification (InvestmentBankingGroupClassification | None): The investment banking classification results, if applicable.
65
+ investing_focus_asset_class_classification (InvestingFocusAssetClassClassification | None): The investing focus asset class classification results, if applicable.
66
+ investing_focus_sector_classification (InvestingFocusSectorClassification | None): The investing focus sector classification results, if applicable.
67
+ """
68
 
69
  position: Position
70
  work_experience_classification: WorkExperienceClassification
 
74
 
75
 
76
  class LinkedinProfileClassificationResults(BaseModel):
77
+ """
78
+ Represents the complete classification results for a LinkedIn profile.
79
+
80
+ Attributes:
81
+ classified_educations (Sequence[ClassifiedEducation]): A sequence of classified education items.
82
+ classified_work_experiences (Sequence[ClassifiedWorkExperience]): A sequence of classified work experience items.
83
+ """
84
 
85
  classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
86
  classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
87
 
88
 
89
+ class VspDataEnrichment:
90
  """
91
+ Main class for the VSP Data Enrichment project.
92
 
93
+ This class encapsulates all the necessary classifiers and methods to process
94
+ and enrich LinkedIn profile data with various classifications.
95
 
96
+ Attributes:
97
+ education_classifier (EducationClassifier): Classifier for education items.
98
+ work_experience_classifier (WorkExperienceClassifier): Classifier for general work experiences.
99
+ investment_banking_classifier (InvestmentBankingGroupClassifier): Classifier for investment banking groups.
100
+ investing_focus_asset_class_classifier (InvestingFocusAssetClassClassifier): Classifier for investing focus asset classes.
101
+ investing_focus_sector_classifier (InvestingFocusSectorClassifier): Classifier for investing focus sectors.
102
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ def __init__(self):
105
+ """Initialize the VspDataEnrichment class with all required classifiers."""
106
+ self._education_classifier = EducationClassifier()
107
+ self._work_experience_classifier = WorkExperienceClassifier()
108
+ self._investment_banking_classifier = InvestmentBankingGroupClassifier()
109
+ self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
110
+ self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
111
+
112
+ async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
113
+ """
114
+ Process a LinkedIn profile and classify its education and work experiences.
115
+
116
+ This method maintains the original order of educations and work experiences
117
+ from the input profile while performing asynchronous classification tasks.
118
+
119
+ Args:
120
+ profile (LinkedinProfile): The LinkedIn profile to process.
121
+
122
+ Returns:
123
+ LinkedinProfileClassificationResults: The comprehensive classification results for the profile.
124
+ """
125
+ # Create tasks for education classification
126
+ education_tasks = {
127
+ education: self._education_classifier.classify_education(profile, education)
128
+ for education in profile.educations
129
+ }
130
+
131
+ # Create tasks for work experience classification
132
+ work_experience_tasks = {
133
+ position: self._work_experience_classifier.classify_work_experience(profile, position)
134
+ for position in profile.positions
135
+ }
136
+
137
+ # Wait for all education and work experience classifications to complete
138
+ education_results = await asyncio.gather(*education_tasks.values())
139
+ work_experience_results = await asyncio.gather(*work_experience_tasks.values())
140
+
141
+ # Create ClassifiedEducation objects in the original order
142
+ classified_educations = [
143
+ ClassifiedEducation(education=education, classification=classification)
144
+ for education, classification in zip(profile.educations, education_results)
145
+ ]
146
+
147
+ # Process work experiences and create ClassifiedWorkExperience objects
148
+ classified_work_experiences = []
149
+ for position, work_classification in zip(profile.positions, work_experience_results):
150
+ classified_work_experience = ClassifiedWorkExperience(
151
+ position=position, work_experience_classification=work_classification
152
+ )
153
+
154
+ if work_classification.primary_job_type not in {
155
+ work_classification.primary_job_type.INTERNSHIP,
156
+ work_classification.primary_job_type.EXTRACURRICULAR,
157
+ }:
158
+ if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTMENT_BANKING:
159
+ ib_classification = await self._investment_banking_classifier.classify_investment_banking_group(
160
+ profile, position
161
+ )
162
+ classified_work_experience.investment_banking_classification = ib_classification
163
+
164
+ if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING:
165
+ asset_class_task = self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
166
+ profile, position
167
+ )
168
+ sector_task = self._investing_focus_sector_classifier.classify_investing_focus_sector(
169
+ profile, position
170
+ )
171
+
172
+ asset_class_result, sector_result = await asyncio.gather(asset_class_task, sector_task)
173
+
174
+ classified_work_experience.investing_focus_asset_class_classification = asset_class_result
175
+ classified_work_experience.investing_focus_sector_classification = sector_result
176
+
177
+ classified_work_experiences.append(classified_work_experience)
178
+
179
+ return LinkedinProfileClassificationResults(
180
+ classified_educations=classified_educations, classified_work_experiences=classified_work_experiences
181
+ )
182
 
183
 
184
  async def main() -> None:
185
  """
186
+ Main function to demonstrate the usage of VspDataEnrichment.
187
+
188
+ This function loads a sample LinkedIn profile from a JSON file,
189
+ processes it using the VspDataEnrichment class, and prints the results.
190
  """
191
+ import json
192
+
193
+ # Load a sample LinkedIn profile
194
  with open("tests/test_data/sample_profiles/eric_armagost.json") as f:
195
+ profile_data = json.load(f)
196
+ profile = LinkedinProfile.model_validate(profile_data)
197
+
198
+ # Create an instance of VspDataEnrichment and process the profile
199
+ vsp_enrichment = VspDataEnrichment()
200
+ results = await vsp_enrichment.process_linkedin_profile(profile)
201
+
202
+ # Print the results
203
  print(results.model_dump_json(indent=2))
204
 
205