navkast
commited on
Commit
·
3b993c4
unverified
·
0
Parent(s):
Initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +166 -0
- README.md +101 -0
- template/.vscode/settings.json +16 -0
- template/Makefile +103 -0
- template/pyproject.toml +74 -0
- template/src/notebooks/data/hansae_catlett.json +1054 -0
- template/src/notebooks/education_classifier.ipynb +137 -0
- template/src/notebooks/work_experience_classifier.ipynb +160 -0
- template/src/template/__init__.py +0 -0
- template/src/template/app/__init__.py +0 -0
- template/src/template/app/bindings.py +6 -0
- template/src/template/app/education_classifier.py +138 -0
- template/src/template/app/model/linkedin/linkedin_formatters.py +237 -0
- template/src/template/app/model/linkedin/linkedin_models.py +143 -0
- template/src/template/app/prompts/__init__.py +0 -0
- template/src/template/app/prompts/education_classifier/1 - education_classifier_human.txt +9 -0
- template/src/template/app/prompts/education_classifier/1 - education_classifier_system.txt +21 -0
- template/src/template/app/prompts/prompt_loader.py +175 -0
- template/src/template/app/prompts/work_experience_classifier/1 - work_experience_classifier_human.txt +9 -0
- template/src/template/app/prompts/work_experience_classifier/1 - work_experience_classifier_system.txt +38 -0
- template/src/template/app/work_experience_classifier.py +174 -0
- template/src/template/llm/__init__.py +0 -0
- template/src/template/llm/bedrock/__init__.py +0 -0
- template/src/template/llm/bedrock/bedrock.py +140 -0
- template/src/template/llm/bedrock/bedrock_model.py +149 -0
- template/src/template/llm/bedrock/bedrock_rate_limiter.py +52 -0
- template/src/template/llm/llm_service.py +48 -0
- template/src/template/llm/openai/__init__.py +0 -0
- template/src/template/llm/openai/openai.py +200 -0
- template/src/template/llm/openai/openai_model.py +74 -0
- template/src/template/llm/openai/openai_rate_limiter.py +193 -0
- template/src/template/llm/openrouter/__init__.py +0 -0
- template/src/template/llm/prompt.py +127 -0
- template/src/template/llm/prompt_chain.py +117 -0
- template/src/template/llm/prompt_template.py +24 -0
- template/src/template/llm/prompt_text.py +75 -0
- template/src/template/shared/__init__.py +0 -0
- template/src/template/shared/aws_clients.py +282 -0
- template/src/template/shared/config.py +93 -0
- template/src/template/shared/config.toml +9 -0
- template/src/template/shared/logger_factory.py +70 -0
- template/tests/__init__.py +0 -0
- template/tests/app/__init__.py +0 -0
- template/tests/app/prompts/__init__.py +0 -0
- template/tests/app/prompts/test_prompt_loader.py +69 -0
- template/tests/app/prompts/test_prompts/basic_test/1 - test_human.txt +1 -0
- template/tests/app/prompts/test_prompts/basic_test/1 - test_system.txt +1 -0
- template/tests/app/prompts/test_prompts/basic_test/2 - test2_human.txt +1 -0
- template/tests/app/test_education_classifier.py +85 -0
- template/tests/app/test_work_experience_classifier.py +105 -0
.gitignore
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 110 |
+
.pdm.toml
|
| 111 |
+
.pdm-python
|
| 112 |
+
.pdm-build/
|
| 113 |
+
|
| 114 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 115 |
+
__pypackages__/
|
| 116 |
+
|
| 117 |
+
# Celery stuff
|
| 118 |
+
celerybeat-schedule
|
| 119 |
+
celerybeat.pid
|
| 120 |
+
|
| 121 |
+
# SageMath parsed files
|
| 122 |
+
*.sage.py
|
| 123 |
+
|
| 124 |
+
# Environments
|
| 125 |
+
.env
|
| 126 |
+
.venv
|
| 127 |
+
env/
|
| 128 |
+
venv/
|
| 129 |
+
ENV/
|
| 130 |
+
env.bak/
|
| 131 |
+
venv.bak/
|
| 132 |
+
|
| 133 |
+
# Spyder project settings
|
| 134 |
+
.spyderproject
|
| 135 |
+
.spyproject
|
| 136 |
+
|
| 137 |
+
# Rope project settings
|
| 138 |
+
.ropeproject
|
| 139 |
+
|
| 140 |
+
# mkdocs documentation
|
| 141 |
+
/site
|
| 142 |
+
|
| 143 |
+
# mypy
|
| 144 |
+
.mypy_cache/
|
| 145 |
+
.dmypy.json
|
| 146 |
+
dmypy.json
|
| 147 |
+
|
| 148 |
+
# Pyre type checker
|
| 149 |
+
.pyre/
|
| 150 |
+
|
| 151 |
+
# pytype static type analyzer
|
| 152 |
+
.pytype/
|
| 153 |
+
|
| 154 |
+
# Cython debug symbols
|
| 155 |
+
cython_debug/
|
| 156 |
+
|
| 157 |
+
# PyCharm
|
| 158 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 159 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 160 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 161 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 162 |
+
#.idea/
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
.claudesync
|
| 166 |
+
.aider*
|
README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Template repository for LLM projects.
|
| 2 |
+
|
| 3 |
+
## Introduction
|
| 4 |
+
|
| 5 |
+
This repository serves as a template for modern Python projects, leveraging the latest features of Python 3.12 and industry-standard tools for development, testing, and packaging.
|
| 6 |
+
|
| 7 |
+
## Key Features
|
| 8 |
+
|
| 9 |
+
- Python 3.12+ compatibility
|
| 10 |
+
- Modern dependency management with `uv`
|
| 11 |
+
- Code formatting with `black` and `isort`
|
| 12 |
+
- Static type checking with `mypy`
|
| 13 |
+
- Testing with `pytest`
|
| 14 |
+
- Build system using `hatchling`
|
| 15 |
+
|
| 16 |
+
## Prerequisites
|
| 17 |
+
|
| 18 |
+
- Python 3.12 or higher
|
| 19 |
+
- `uv` package installer
|
| 20 |
+
|
| 21 |
+
## Installation
|
| 22 |
+
|
| 23 |
+
The only tool you need to install is `uv`. You can install it using `pip`:
|
| 24 |
+
|
| 25 |
+
```bash
|
| 26 |
+
pip install uv
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
`uv` is a modern Python package installer and resolver that offers several benefits over traditional pip. Benefits of using uv:
|
| 30 |
+
|
| 31 |
+
- Significantly faster installation and resolution of dependencies
|
| 32 |
+
- Built-in support for virtual environments
|
| 33 |
+
- Improved dependency resolution algorithm
|
| 34 |
+
- Written in Rust, offering better performance and memory safety
|
| 35 |
+
|
| 36 |
+
After `uv` is installed, prefix everything with `uv` or `uvx`:
|
| 37 |
+
- `uv pip install` for installing packages.
|
| 38 |
+
- `uvx` for running tools, like `uvx black`
|
| 39 |
+
|
| 40 |
+
## Development Workflow
|
| 41 |
+
|
| 42 |
+
1. Create and activate a virtual environment using `uv`:
|
| 43 |
+
```
|
| 44 |
+
uv venv
|
| 45 |
+
source .venv/bin/activate
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
2. Install the project and its dependencies:
|
| 49 |
+
```
|
| 50 |
+
make
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
### Code Formatting
|
| 55 |
+
|
| 56 |
+
We use `black` and `isort` to maintain consistent code formatting:
|
| 57 |
+
|
| 58 |
+
```
|
| 59 |
+
uvx isort src tests
|
| 60 |
+
uvx black src tests
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### Linting and Type Checking
|
| 64 |
+
|
| 65 |
+
Run static type checking with `mypy`:
|
| 66 |
+
|
| 67 |
+
```
|
| 68 |
+
uv run mypy src
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
### Running Tests
|
| 72 |
+
|
| 73 |
+
Execute the test suite using `pytest`:
|
| 74 |
+
|
| 75 |
+
```
|
| 76 |
+
uv run pytest
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
### Building the Project
|
| 80 |
+
|
| 81 |
+
To build the project, use `hatchling`:
|
| 82 |
+
|
| 83 |
+
```
|
| 84 |
+
uvx hatchling build
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
## Configuration
|
| 88 |
+
|
| 89 |
+
Project configuration is managed through `pyproject.toml`. This includes settings for `black`, `isort`, `mypy`, and `pytest`.
|
| 90 |
+
|
| 91 |
+
## Makefile Commands
|
| 92 |
+
|
| 93 |
+
For convenience, common tasks are automated in the `Makefile`:
|
| 94 |
+
|
| 95 |
+
- `make install`: Install the project and its dependencies
|
| 96 |
+
- `make format`: Run code formatters
|
| 97 |
+
- `make lint`: Run linters and type checkers
|
| 98 |
+
- `make test`: Run the test suite
|
| 99 |
+
- `make build`: Build the project
|
| 100 |
+
|
| 101 |
+
Run `make help` to see all available commands.
|
template/.vscode/settings.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cSpell.words": [
|
| 3 |
+
"aiohttp",
|
| 4 |
+
"awsrequest",
|
| 5 |
+
"botocore",
|
| 6 |
+
"openai",
|
| 7 |
+
"pydantic"
|
| 8 |
+
],
|
| 9 |
+
"makefile.configureOnOpen": false,
|
| 10 |
+
"python.analysis.typeCheckingMode": "basic",
|
| 11 |
+
"python.testing.pytestArgs": [
|
| 12 |
+
"tests"
|
| 13 |
+
],
|
| 14 |
+
"python.testing.unittestEnabled": false,
|
| 15 |
+
"python.testing.pytestEnabled": true
|
| 16 |
+
}
|
template/Makefile
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Makefile for modern Python 3.12 project management
|
| 2 |
+
# Tools: uv, isort, black, mypy, hatchling, pytest
|
| 3 |
+
|
| 4 |
+
SHELL := /bin/bash
|
| 5 |
+
.SHELLFLAGS := -eu -o pipefail -c
|
| 6 |
+
|
| 7 |
+
# Ensure uv is installed
|
| 8 |
+
ENSURE_UV := which uv > /dev/null || pip install uv
|
| 9 |
+
|
| 10 |
+
# Python version (update as needed)
|
| 11 |
+
PYTHON_VERSION := 3.12
|
| 12 |
+
|
| 13 |
+
# Default target
|
| 14 |
+
all: install format lint build test
|
| 15 |
+
|
| 16 |
+
.PHONY: help
|
| 17 |
+
help: ## Display this help message
|
| 18 |
+
@echo "Usage: make [target]"
|
| 19 |
+
@echo ""
|
| 20 |
+
@echo "Targets:"
|
| 21 |
+
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-15s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
|
| 22 |
+
|
| 23 |
+
.PHONY: init
|
| 24 |
+
init: ## Initialize the project environment
|
| 25 |
+
@$(ENSURE_UV)
|
| 26 |
+
@echo "Creating virtual environment..."
|
| 27 |
+
@uv venv
|
| 28 |
+
|
| 29 |
+
.PHONY: install
|
| 30 |
+
install: ## Install project dependencies
|
| 31 |
+
@$(ENSURE_UV)
|
| 32 |
+
@echo "Installing dependencies..."
|
| 33 |
+
@uv pip install -e .[dev]
|
| 34 |
+
|
| 35 |
+
.PHONY: format
|
| 36 |
+
format: ## Format code using isort and black
|
| 37 |
+
@$(ENSURE_UV)
|
| 38 |
+
@echo "Formatting code..."
|
| 39 |
+
@uv pip install isort black
|
| 40 |
+
@uvx isort src tests
|
| 41 |
+
@uvx black src tests
|
| 42 |
+
|
| 43 |
+
.PHONY: lint
|
| 44 |
+
lint: ## Run linters and type-checkers (ruff and mypy)
|
| 45 |
+
@$(ENSURE_UV)
|
| 46 |
+
@echo "Linting and type-checking..."
|
| 47 |
+
@uv pip install ruff mypy
|
| 48 |
+
@uvx ruff format src tests
|
| 49 |
+
@uvx ruff check --fix src tests
|
| 50 |
+
@uv run mypy src
|
| 51 |
+
|
| 52 |
+
.PHONY: test
|
| 53 |
+
test: ## Run tests using pytest
|
| 54 |
+
@$(ENSURE_UV)
|
| 55 |
+
@echo "Running tests..."
|
| 56 |
+
@uv pip install pytest
|
| 57 |
+
@uv run pytest
|
| 58 |
+
|
| 59 |
+
.PHONY: build
|
| 60 |
+
build: ## Build the project using hatchling
|
| 61 |
+
@$(ENSURE_UV)
|
| 62 |
+
@echo "Building project..."
|
| 63 |
+
@uv pip install hatchling
|
| 64 |
+
@uvx hatchling build
|
| 65 |
+
|
| 66 |
+
.PHONY: clean
|
| 67 |
+
clean: ## Clean up build artifacts and cache
|
| 68 |
+
@echo "Cleaning up..."
|
| 69 |
+
@rm -rf build dist .eggs *.egg-info
|
| 70 |
+
@find . -type d -name '__pycache__' -exec rm -rf {} +
|
| 71 |
+
@find . -type f -name '*.pyc' -exec rm -f {} +
|
| 72 |
+
@find . -type f -name '*.pyo' -exec rm -f {} +
|
| 73 |
+
@find . -type f -name '*~' -exec rm -f {} +
|
| 74 |
+
|
| 75 |
+
.PHONY: all
|
| 76 |
+
all: install format lint build test ## Run all main tasks (install, format, lint, build, test)
|
| 77 |
+
|
| 78 |
+
# AWS SSO configuration
|
| 79 |
+
AWS_PROFILE_NAME := Geometric-PowerUserAccess
|
| 80 |
+
AWS_SSO_START_URL := https://geometric.awsapps.com/start
|
| 81 |
+
AWS_SSO_REGION := us-east-1
|
| 82 |
+
AWS_ACCOUNT_ID := 339713096219
|
| 83 |
+
AWS_ROLE_NAME := PowerUserAccess
|
| 84 |
+
AWS_REGION := us-east-1
|
| 85 |
+
|
| 86 |
+
.PHONY: configure-aws-sso
|
| 87 |
+
configure-aws-sso: ## Configure AWS SSO
|
| 88 |
+
@echo "Configuring AWS SSO..."
|
| 89 |
+
@mkdir -p ~/.aws
|
| 90 |
+
@echo "# AWS SSO Configuration" > ~/.aws/config
|
| 91 |
+
@echo "" >> ~/.aws/config
|
| 92 |
+
@echo "[profile $(AWS_PROFILE_NAME)]" >> ~/.aws/config
|
| 93 |
+
@echo "sso_session = geometric" >> ~/.aws/config
|
| 94 |
+
@echo "sso_account_id = $(AWS_ACCOUNT_ID)" >> ~/.aws/config
|
| 95 |
+
@echo "sso_role_name = $(AWS_ROLE_NAME)" >> ~/.aws/config
|
| 96 |
+
@echo "region = $(AWS_REGION)" >> ~/.aws/config
|
| 97 |
+
@echo "" >> ~/.aws/config
|
| 98 |
+
@echo "[sso-session geometric]" >> ~/.aws/config
|
| 99 |
+
@echo "sso_start_url = $(AWS_SSO_START_URL)" >> ~/.aws/config
|
| 100 |
+
@echo "sso_region = $(AWS_SSO_REGION)" >> ~/.aws/config
|
| 101 |
+
@echo "sso_registration_scopes = sso:account:access" >> ~/.aws/config
|
| 102 |
+
@echo "AWS SSO configuration complete."
|
| 103 |
+
@aws sso login --profile $(AWS_PROFILE_NAME)
|
template/pyproject.toml
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "template"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "A template project for modern Python development"
|
| 5 |
+
requires-python = ">=3.12"
|
| 6 |
+
dependencies = [
|
| 7 |
+
"aiohttp>=3.10.5",
|
| 8 |
+
"boto3>=1.35.12",
|
| 9 |
+
"ipykernel>=6.29.5",
|
| 10 |
+
"openai>=1.43.0",
|
| 11 |
+
"pydantic>=2.8.2",
|
| 12 |
+
"pytimeparse>=1.1.8",
|
| 13 |
+
"requests>=2.32.3",
|
| 14 |
+
"structlog>=24.4.0",
|
| 15 |
+
"tenacity>=9.0.0",
|
| 16 |
+
"tomli>=2.0.1",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
[project.optional-dependencies]
|
| 20 |
+
dev = [
|
| 21 |
+
"ruff>=0.2.0",
|
| 22 |
+
"black>=23.12.0",
|
| 23 |
+
"isort>=5.13.0",
|
| 24 |
+
"mypy>=1.8.0",
|
| 25 |
+
"pytest>=7.4.0",
|
| 26 |
+
"pytest-asyncio>=0.21.0",
|
| 27 |
+
"mypy-boto3-bedrock-runtime>=1.35.9",
|
| 28 |
+
"mypy-boto3-dynamodb>=1.35.0",
|
| 29 |
+
"mypy-boto3-s3>=1.35.2",
|
| 30 |
+
"mypy-boto3-ssm>=1.35.0",
|
| 31 |
+
"types-requests>=2.32.0.20240905",
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
[tool.hatch.build.targets.wheel]
|
| 35 |
+
packages = ["src/template"]
|
| 36 |
+
|
| 37 |
+
[tool.ruff]
|
| 38 |
+
lint.select = ["E", "F", "I", "N"]
|
| 39 |
+
lint.ignore = []
|
| 40 |
+
line-length = 120
|
| 41 |
+
target-version = "py312"
|
| 42 |
+
|
| 43 |
+
[tool.black]
|
| 44 |
+
line-length = 120
|
| 45 |
+
target-version = ["py312"]
|
| 46 |
+
|
| 47 |
+
[tool.isort]
|
| 48 |
+
profile = "black"
|
| 49 |
+
line_length = 120
|
| 50 |
+
|
| 51 |
+
[tool.mypy]
|
| 52 |
+
python_version = "3.12"
|
| 53 |
+
strict = true
|
| 54 |
+
ignore_missing_imports = true
|
| 55 |
+
mypy_path = "src"
|
| 56 |
+
namespace_packages = true
|
| 57 |
+
explicit_package_bases = true
|
| 58 |
+
enable_incomplete_feature = ["NewGenericSyntax"]
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
[build-system]
|
| 62 |
+
requires = ["hatchling"]
|
| 63 |
+
build-backend = "hatchling.build"
|
| 64 |
+
|
| 65 |
+
[tool.pytest.ini_options]
|
| 66 |
+
addopts = ["--import-mode=importlib"]
|
| 67 |
+
pythonpath = [".", "src"]
|
| 68 |
+
testpaths = ["tests"]
|
| 69 |
+
log_cli = 1
|
| 70 |
+
asyncio_default_fixture_loop_scope = "function"
|
| 71 |
+
|
| 72 |
+
[tool.uv]
|
| 73 |
+
link-mode = "copy"
|
| 74 |
+
python-preference = "only-managed"
|
template/src/notebooks/data/hansae_catlett.json
ADDED
|
@@ -0,0 +1,1054 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"urn": "ACoAAAScLaoBojlZZh9af6GI5G566vDOTj50KOw",
|
| 3 |
+
"username": "hansae-catlett-436a9b21",
|
| 4 |
+
"firstName": "Hansae",
|
| 5 |
+
"lastName": "Catlett",
|
| 6 |
+
"isCreator": false,
|
| 7 |
+
"isOpenToWork": false,
|
| 8 |
+
"isHiring": false,
|
| 9 |
+
"profilePicture": "https://media.licdn.com/dms/image/v2/D5603AQEVfJQVTKPvYw/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1710971553153?e=1730332800&v=beta&t=D40AmtyFPi_BuJ2JPycWFbv0jSQhf3IqrVnf22BxSZA",
|
| 10 |
+
"backgroundImage": null,
|
| 11 |
+
"summary": "",
|
| 12 |
+
"headline": "Partner at HOF Capital | Co-founder & GP at The MBA Fund",
|
| 13 |
+
"geo": {
|
| 14 |
+
"country": "United States",
|
| 15 |
+
"city": "New York, New York",
|
| 16 |
+
"full": "New York, New York, United States"
|
| 17 |
+
},
|
| 18 |
+
"languages": null,
|
| 19 |
+
"educations": [
|
| 20 |
+
{
|
| 21 |
+
"start": {
|
| 22 |
+
"year": 2016,
|
| 23 |
+
"month": 0,
|
| 24 |
+
"day": 0
|
| 25 |
+
},
|
| 26 |
+
"end": {
|
| 27 |
+
"year": 2019,
|
| 28 |
+
"month": 0,
|
| 29 |
+
"day": 0
|
| 30 |
+
},
|
| 31 |
+
"fieldOfStudy": "",
|
| 32 |
+
"degree": "Master of Business Administration (M.B.A.)",
|
| 33 |
+
"grade": "",
|
| 34 |
+
"schoolName": "Stanford University Graduate School of Business",
|
| 35 |
+
"description": "",
|
| 36 |
+
"activities": "Ernest C. Arbuckle Award Finalist; Miller Social Change Leadership Award Winner; Arbuckle Leadership Fellow; Venture Capital Club; GSB Impact Fund; Co-President Social Innovation Club; Co-President Government & Politics Club; LOWkeynote (main stage featured speaker); TALK Coach; GSB Show",
|
| 37 |
+
"url": "https://www.linkedin.com/school/stanford-graduate-school-of-business/",
|
| 38 |
+
"schoolId": "1791"
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"start": {
|
| 42 |
+
"year": 2016,
|
| 43 |
+
"month": 0,
|
| 44 |
+
"day": 0
|
| 45 |
+
},
|
| 46 |
+
"end": {
|
| 47 |
+
"year": 2019,
|
| 48 |
+
"month": 0,
|
| 49 |
+
"day": 0
|
| 50 |
+
},
|
| 51 |
+
"fieldOfStudy": "",
|
| 52 |
+
"degree": "Master of Public Policy (M.P.P.)",
|
| 53 |
+
"grade": "",
|
| 54 |
+
"schoolName": "Stanford University",
|
| 55 |
+
"description": "Led CalFresh/SNAP (food stamps) policy & impact analysis with California State Senator",
|
| 56 |
+
"activities": "",
|
| 57 |
+
"url": "https://www.linkedin.com/school/stanford-university/",
|
| 58 |
+
"schoolId": "1792"
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"start": {
|
| 62 |
+
"year": 2007,
|
| 63 |
+
"month": 0,
|
| 64 |
+
"day": 0
|
| 65 |
+
},
|
| 66 |
+
"end": {
|
| 67 |
+
"year": 2011,
|
| 68 |
+
"month": 0,
|
| 69 |
+
"day": 0
|
| 70 |
+
},
|
| 71 |
+
"fieldOfStudy": "Biomedical Engineering, Philosophy",
|
| 72 |
+
"degree": "Bachelor of Arts",
|
| 73 |
+
"grade": "",
|
| 74 |
+
"schoolName": "Harvard University",
|
| 75 |
+
"description": "Graduated with Honors",
|
| 76 |
+
"activities": "Club Basketball, College Bound Mentoring, Veritas Financial Group, Rugby, Crew",
|
| 77 |
+
"url": "https://www.linkedin.com/school/harvard-university/",
|
| 78 |
+
"schoolId": "1646"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"start": {
|
| 82 |
+
"year": 0,
|
| 83 |
+
"month": 0,
|
| 84 |
+
"day": 0
|
| 85 |
+
},
|
| 86 |
+
"end": {
|
| 87 |
+
"year": 0,
|
| 88 |
+
"month": 0,
|
| 89 |
+
"day": 0
|
| 90 |
+
},
|
| 91 |
+
"fieldOfStudy": "Engineering, Philosophy, Finance",
|
| 92 |
+
"degree": "Study Abroad",
|
| 93 |
+
"grade": "",
|
| 94 |
+
"schoolName": "University of New South Wales",
|
| 95 |
+
"description": "",
|
| 96 |
+
"activities": "Premier League Basketball Team",
|
| 97 |
+
"url": "https://www.linkedin.com/school/unsw/",
|
| 98 |
+
"schoolId": "6096"
|
| 99 |
+
}
|
| 100 |
+
],
|
| 101 |
+
"position": [
|
| 102 |
+
{
|
| 103 |
+
"companyId": 9217965,
|
| 104 |
+
"companyName": "HOF Capital",
|
| 105 |
+
"companyUsername": "hof-capital",
|
| 106 |
+
"companyURL": "https://www.linkedin.com/company/hof-capital/",
|
| 107 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQFAy04TxfOQ1w/company-logo_400_400/company-logo_400_400/0/1631336133602?e=1733356800&v=beta&t=XT3nnr1M-X8_dEx8Wp2wMOLyu6NXc_9c1CFa1xdu-dY",
|
| 108 |
+
"companyIndustry": "Venture Capital & Private Equity",
|
| 109 |
+
"companyStaffCountRange": "11 - 50",
|
| 110 |
+
"title": "Partner",
|
| 111 |
+
"multiLocaleTitle": {
|
| 112 |
+
"en_US": "Partner"
|
| 113 |
+
},
|
| 114 |
+
"multiLocaleCompanyName": {
|
| 115 |
+
"en_US": "HOF Capital"
|
| 116 |
+
},
|
| 117 |
+
"location": "New York City Metropolitan Area",
|
| 118 |
+
"description": "",
|
| 119 |
+
"employmentType": "",
|
| 120 |
+
"start": {
|
| 121 |
+
"year": 2024,
|
| 122 |
+
"month": 3,
|
| 123 |
+
"day": 0
|
| 124 |
+
},
|
| 125 |
+
"end": {
|
| 126 |
+
"year": 0,
|
| 127 |
+
"month": 0,
|
| 128 |
+
"day": 0
|
| 129 |
+
}
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"companyId": 33433434,
|
| 133 |
+
"companyName": "The MBA Fund",
|
| 134 |
+
"companyUsername": "thembafund",
|
| 135 |
+
"companyURL": "https://www.linkedin.com/company/thembafund/",
|
| 136 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQHh7Q_nr-3JiA/company-logo_400_400/company-logo_400_400/0/1630572463370/thembafund_logo?e=1733356800&v=beta&t=BJUl211JX6J1xxnvUU4Hy0pNd8lIKesF5XpHEdiJP6M",
|
| 137 |
+
"companyIndustry": "Venture Capital & Private Equity",
|
| 138 |
+
"companyStaffCountRange": "11 - 50",
|
| 139 |
+
"title": "Co-Founder, General Partner",
|
| 140 |
+
"multiLocaleTitle": {
|
| 141 |
+
"en_US": "Co-Founder, General Partner"
|
| 142 |
+
},
|
| 143 |
+
"multiLocaleCompanyName": {
|
| 144 |
+
"en_US": "The MBA Fund"
|
| 145 |
+
},
|
| 146 |
+
"location": "San Francisco Bay Area",
|
| 147 |
+
"description": "The MBA Fund is a venture capital firm that helps founders from the top startup-producing universities create legendary companies. We most frequently back student, alumni and dropout founders from Harvard, Stanford, and Wharton (expanding presence nationally!). \n\nWe\u2019re a community of investors and operators who aren\u2019t afraid to take the riskiest leaps with founders who are just getting started. Our backers include top-tier VC firms, repeat founders, experienced execs, and super angels with ties to the communities where we invest. This network is ready and willing to help founders in our portfolio with financing, hiring, customer growth and more.\n\nSelect investments include:\n* Jeeves ($2B)\n* Dandy ($1B)\n* TRM Labs ($600M+)\n* Hightouch ($450M+)\n* Luxury Presence ($200M+)\n* FlutterFlow ($150M+)\n* Legacy ($150M+)\n* Seso ($150M+)\n* Peachy ($100M+)",
|
| 148 |
+
"employmentType": "",
|
| 149 |
+
"start": {
|
| 150 |
+
"year": 2018,
|
| 151 |
+
"month": 0,
|
| 152 |
+
"day": 0
|
| 153 |
+
},
|
| 154 |
+
"end": {
|
| 155 |
+
"year": 0,
|
| 156 |
+
"month": 0,
|
| 157 |
+
"day": 0
|
| 158 |
+
}
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"companyId": 16754,
|
| 162 |
+
"companyName": "Bessemer Venture Partners",
|
| 163 |
+
"companyUsername": "bessemer-venture-partners",
|
| 164 |
+
"companyURL": "https://www.linkedin.com/company/bessemer-venture-partners/",
|
| 165 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQF2-_rRIldZpw/company-logo_400_400/company-logo_400_400/0/1719862419823/bessemer_venture_partners_logo?e=1733356800&v=beta&t=rX0N0rY1b8HL1G1_MpwJi16BhGWtBC-dyE2cBFi644A",
|
| 166 |
+
"companyIndustry": "Venture Capital & Private Equity",
|
| 167 |
+
"companyStaffCountRange": "11 - 50",
|
| 168 |
+
"title": "Vice President",
|
| 169 |
+
"multiLocaleTitle": {
|
| 170 |
+
"en_US": "Vice President"
|
| 171 |
+
},
|
| 172 |
+
"multiLocaleCompanyName": {
|
| 173 |
+
"en_US": "Bessemer Venture Partners"
|
| 174 |
+
},
|
| 175 |
+
"location": "New York City Metropolitan Area",
|
| 176 |
+
"description": "",
|
| 177 |
+
"employmentType": "Full-time",
|
| 178 |
+
"start": {
|
| 179 |
+
"year": 2019,
|
| 180 |
+
"month": 7,
|
| 181 |
+
"day": 0
|
| 182 |
+
},
|
| 183 |
+
"end": {
|
| 184 |
+
"year": 2024,
|
| 185 |
+
"month": 2,
|
| 186 |
+
"day": 0
|
| 187 |
+
}
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"companyId": 11745039,
|
| 191 |
+
"companyName": "Rillavoice",
|
| 192 |
+
"companyUsername": "rilla-co",
|
| 193 |
+
"companyURL": "https://www.linkedin.com/company/rilla-co/",
|
| 194 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQGckOgzxr2p4g/company-logo_400_400/company-logo_400_400/0/1709081270062/rilla_co_logo?e=1733356800&v=beta&t=MvfFWJNVqsB3jaJxROLxP896fz5akuU8pndekbJ3C5g",
|
| 195 |
+
"companyIndustry": "Computer Software",
|
| 196 |
+
"companyStaffCountRange": "11 - 50",
|
| 197 |
+
"title": "Board Observer",
|
| 198 |
+
"multiLocaleTitle": {
|
| 199 |
+
"en_US": "Board Observer"
|
| 200 |
+
},
|
| 201 |
+
"multiLocaleCompanyName": {
|
| 202 |
+
"en_US": "Rillavoice"
|
| 203 |
+
},
|
| 204 |
+
"location": "New York City Metropolitan Area",
|
| 205 |
+
"description": "",
|
| 206 |
+
"employmentType": "",
|
| 207 |
+
"start": {
|
| 208 |
+
"year": 2023,
|
| 209 |
+
"month": 0,
|
| 210 |
+
"day": 0
|
| 211 |
+
},
|
| 212 |
+
"end": {
|
| 213 |
+
"year": 2024,
|
| 214 |
+
"month": 0,
|
| 215 |
+
"day": 0
|
| 216 |
+
}
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"companyId": 74099951,
|
| 220 |
+
"companyName": "Archy",
|
| 221 |
+
"companyUsername": "archydental",
|
| 222 |
+
"companyURL": "https://www.linkedin.com/company/archydental/",
|
| 223 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQG13A6MTBOt5w/company-logo_400_400/company-logo_400_400/0/1724949981185/archydental_logo?e=1733356800&v=beta&t=prnsqQ4vb9F4cnt5_4Cmc6LHXYKt1ceIU6Z_G4AlzCw",
|
| 224 |
+
"companyIndustry": "Computer Software",
|
| 225 |
+
"companyStaffCountRange": "11 - 50",
|
| 226 |
+
"title": "Investor",
|
| 227 |
+
"multiLocaleTitle": {
|
| 228 |
+
"en_US": "Investor"
|
| 229 |
+
},
|
| 230 |
+
"multiLocaleCompanyName": {
|
| 231 |
+
"en_US": "Archy"
|
| 232 |
+
},
|
| 233 |
+
"location": "San Francisco Bay Area",
|
| 234 |
+
"description": "",
|
| 235 |
+
"employmentType": "",
|
| 236 |
+
"start": {
|
| 237 |
+
"year": 2022,
|
| 238 |
+
"month": 0,
|
| 239 |
+
"day": 0
|
| 240 |
+
},
|
| 241 |
+
"end": {
|
| 242 |
+
"year": 2024,
|
| 243 |
+
"month": 0,
|
| 244 |
+
"day": 0
|
| 245 |
+
}
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"companyId": 18762446,
|
| 249 |
+
"companyName": "MaintainX",
|
| 250 |
+
"companyUsername": "getmaintainx",
|
| 251 |
+
"companyURL": "https://www.linkedin.com/company/getmaintainx/",
|
| 252 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQEOOm1IB8UHxQ/company-logo_400_400/company-logo_400_400/0/1719257689258/getmaintainx_logo?e=1733356800&v=beta&t=bQzUQXAg00rNpk8-C34bicPL4Dnxwbyl-pp-Z47Ydr8",
|
| 253 |
+
"companyIndustry": "Computer Software",
|
| 254 |
+
"companyStaffCountRange": "201 - 500",
|
| 255 |
+
"title": "Board Observer",
|
| 256 |
+
"multiLocaleTitle": {
|
| 257 |
+
"en_US": "Board Observer"
|
| 258 |
+
},
|
| 259 |
+
"multiLocaleCompanyName": {
|
| 260 |
+
"en_US": "MaintainX"
|
| 261 |
+
},
|
| 262 |
+
"location": "San Francisco Bay Area",
|
| 263 |
+
"description": "",
|
| 264 |
+
"employmentType": "",
|
| 265 |
+
"start": {
|
| 266 |
+
"year": 2021,
|
| 267 |
+
"month": 0,
|
| 268 |
+
"day": 0
|
| 269 |
+
},
|
| 270 |
+
"end": {
|
| 271 |
+
"year": 2024,
|
| 272 |
+
"month": 0,
|
| 273 |
+
"day": 0
|
| 274 |
+
}
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"companyId": 18927115,
|
| 278 |
+
"companyName": "Spot AI",
|
| 279 |
+
"companyUsername": "spot-ai",
|
| 280 |
+
"companyURL": "https://www.linkedin.com/company/spot-ai/",
|
| 281 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQG2LOnQKbIi4A/company-logo_400_400/company-logo_400_400/0/1630635635761/spot_ai_logo?e=1733356800&v=beta&t=eJx_hAO6SMYTUTnHrmE4-7bz37xkTh1rzjh0CqRBwCs",
|
| 282 |
+
"companyIndustry": "Computer Software",
|
| 283 |
+
"companyStaffCountRange": "51 - 200",
|
| 284 |
+
"title": "Investor",
|
| 285 |
+
"multiLocaleTitle": {
|
| 286 |
+
"en_US": "Investor"
|
| 287 |
+
},
|
| 288 |
+
"multiLocaleCompanyName": {
|
| 289 |
+
"en_US": "Spot AI"
|
| 290 |
+
},
|
| 291 |
+
"location": "San Francisco Bay Area",
|
| 292 |
+
"description": "",
|
| 293 |
+
"employmentType": "",
|
| 294 |
+
"start": {
|
| 295 |
+
"year": 2021,
|
| 296 |
+
"month": 0,
|
| 297 |
+
"day": 0
|
| 298 |
+
},
|
| 299 |
+
"end": {
|
| 300 |
+
"year": 2024,
|
| 301 |
+
"month": 0,
|
| 302 |
+
"day": 0
|
| 303 |
+
}
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"companyId": 31404660,
|
| 307 |
+
"companyName": "VendorPM",
|
| 308 |
+
"companyUsername": "vendorpm",
|
| 309 |
+
"companyURL": "https://www.linkedin.com/company/vendorpm/",
|
| 310 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQEglOi17VDAUg/company-logo_400_400/company-logo_400_400/0/1688751388502/vendorpm_logo?e=1733356800&v=beta&t=g36lpENHyHRFusF-KOJfJvh97fsAezDC1FAqYzG7zUg",
|
| 311 |
+
"companyIndustry": "Computer Software",
|
| 312 |
+
"companyStaffCountRange": "11 - 50",
|
| 313 |
+
"title": "Board Observer",
|
| 314 |
+
"multiLocaleTitle": {
|
| 315 |
+
"en_US": "Board Observer"
|
| 316 |
+
},
|
| 317 |
+
"multiLocaleCompanyName": {
|
| 318 |
+
"en_US": "VendorPM"
|
| 319 |
+
},
|
| 320 |
+
"location": "Toronto, Ontario, Canada",
|
| 321 |
+
"description": "",
|
| 322 |
+
"employmentType": "",
|
| 323 |
+
"start": {
|
| 324 |
+
"year": 2021,
|
| 325 |
+
"month": 0,
|
| 326 |
+
"day": 0
|
| 327 |
+
},
|
| 328 |
+
"end": {
|
| 329 |
+
"year": 2024,
|
| 330 |
+
"month": 0,
|
| 331 |
+
"day": 0
|
| 332 |
+
}
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"companyId": 18872387,
|
| 336 |
+
"companyName": "Luxury Presence",
|
| 337 |
+
"companyUsername": "luxurypresence",
|
| 338 |
+
"companyURL": "https://www.linkedin.com/company/luxurypresence/",
|
| 339 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQEv9eTo6golbw/company-logo_400_400/company-logo_400_400/0/1655397407777/luxurypresence_logo?e=1733356800&v=beta&t=SKKunif574CP6v1Z2LjF1sk6ZM254eu5VWN7ESZ1ylc",
|
| 340 |
+
"companyIndustry": "Computer Software",
|
| 341 |
+
"companyStaffCountRange": "501 - 1000",
|
| 342 |
+
"title": "Board Director",
|
| 343 |
+
"multiLocaleTitle": {
|
| 344 |
+
"en_US": "Board Director"
|
| 345 |
+
},
|
| 346 |
+
"multiLocaleCompanyName": {
|
| 347 |
+
"en_US": "Luxury Presence"
|
| 348 |
+
},
|
| 349 |
+
"location": "Los Angeles Metropolitan Area",
|
| 350 |
+
"description": "",
|
| 351 |
+
"employmentType": "",
|
| 352 |
+
"start": {
|
| 353 |
+
"year": 2021,
|
| 354 |
+
"month": 0,
|
| 355 |
+
"day": 0
|
| 356 |
+
},
|
| 357 |
+
"end": {
|
| 358 |
+
"year": 2024,
|
| 359 |
+
"month": 0,
|
| 360 |
+
"day": 0
|
| 361 |
+
}
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"companyId": 3157549,
|
| 365 |
+
"companyName": "ServiceTitan",
|
| 366 |
+
"companyUsername": "servicetitan",
|
| 367 |
+
"companyURL": "https://www.linkedin.com/company/servicetitan/",
|
| 368 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQEZXBsypK75Fg/company-logo_400_400/company-logo_400_400/0/1660079842958/servicetitan_logo?e=1733356800&v=beta&t=Ly_gt1iDc0qfGCBWgwrUHqnamNy7TZMPEDARe-tlJVA",
|
| 369 |
+
"companyIndustry": "Computer Software",
|
| 370 |
+
"companyStaffCountRange": "1001 - 5000",
|
| 371 |
+
"title": "Investor",
|
| 372 |
+
"multiLocaleTitle": {
|
| 373 |
+
"en_US": "Investor"
|
| 374 |
+
},
|
| 375 |
+
"multiLocaleCompanyName": {
|
| 376 |
+
"en_US": "ServiceTitan"
|
| 377 |
+
},
|
| 378 |
+
"location": "Los Angeles Metropolitan Area",
|
| 379 |
+
"description": "",
|
| 380 |
+
"employmentType": "",
|
| 381 |
+
"start": {
|
| 382 |
+
"year": 2021,
|
| 383 |
+
"month": 0,
|
| 384 |
+
"day": 0
|
| 385 |
+
},
|
| 386 |
+
"end": {
|
| 387 |
+
"year": 2024,
|
| 388 |
+
"month": 0,
|
| 389 |
+
"day": 0
|
| 390 |
+
}
|
| 391 |
+
}
|
| 392 |
+
],
|
| 393 |
+
"fullPositions": [
|
| 394 |
+
{
|
| 395 |
+
"companyId": 9217965,
|
| 396 |
+
"companyName": "HOF Capital",
|
| 397 |
+
"companyUsername": "hof-capital",
|
| 398 |
+
"companyURL": "https://www.linkedin.com/company/hof-capital",
|
| 399 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQFAy04TxfOQ1w/company-logo_400_400/company-logo_400_400/0/1631336133602?e=1733356800&v=beta&t=XT3nnr1M-X8_dEx8Wp2wMOLyu6NXc_9c1CFa1xdu-dY",
|
| 400 |
+
"companyIndustry": "Venture Capital & Private Equity",
|
| 401 |
+
"companyStaffCountRange": "11 - 50",
|
| 402 |
+
"title": "Partner",
|
| 403 |
+
"multiLocaleTitle": null,
|
| 404 |
+
"multiLocaleCompanyName": null,
|
| 405 |
+
"location": "New York City Metropolitan Area",
|
| 406 |
+
"description": "",
|
| 407 |
+
"employmentType": "",
|
| 408 |
+
"start": {
|
| 409 |
+
"year": 2024,
|
| 410 |
+
"month": 3,
|
| 411 |
+
"day": 0
|
| 412 |
+
},
|
| 413 |
+
"end": {
|
| 414 |
+
"year": 0,
|
| 415 |
+
"month": 0,
|
| 416 |
+
"day": 0
|
| 417 |
+
}
|
| 418 |
+
},
|
| 419 |
+
{
|
| 420 |
+
"companyId": 33433434,
|
| 421 |
+
"companyName": "The MBA Fund",
|
| 422 |
+
"companyUsername": "thembafund",
|
| 423 |
+
"companyURL": "https://www.linkedin.com/company/thembafund",
|
| 424 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQHh7Q_nr-3JiA/company-logo_400_400/company-logo_400_400/0/1630572463370/thembafund_logo?e=1733356800&v=beta&t=BJUl211JX6J1xxnvUU4Hy0pNd8lIKesF5XpHEdiJP6M",
|
| 425 |
+
"companyIndustry": "Venture Capital & Private Equity",
|
| 426 |
+
"companyStaffCountRange": "11 - 50",
|
| 427 |
+
"title": "Co-Founder, General Partner",
|
| 428 |
+
"multiLocaleTitle": null,
|
| 429 |
+
"multiLocaleCompanyName": null,
|
| 430 |
+
"location": "San Francisco Bay Area",
|
| 431 |
+
"description": "The MBA Fund is a venture capital firm that helps founders from the top startup-producing universities create legendary companies. We most frequently back student, alumni and dropout founders from Harvard, Stanford, and Wharton (expanding presence nationally!). \n\nWe\u2019re a community of investors and operators who aren\u2019t afraid to take the riskiest leaps with founders who are just getting started. Our backers include top-tier VC firms, repeat founders, experienced execs, and super angels with ties to the communities where we invest. This network is ready and willing to help founders in our portfolio with financing, hiring, customer growth and more.\n\nSelect investments include:\n* Jeeves ($2B)\n* Dandy ($1B)\n* TRM Labs ($600M+)\n* Hightouch ($450M+)\n* Luxury Presence ($200M+)\n* FlutterFlow ($150M+)\n* Legacy ($150M+)\n* Seso ($150M+)\n* Peachy ($100M+)",
|
| 432 |
+
"employmentType": "",
|
| 433 |
+
"start": {
|
| 434 |
+
"year": 2018,
|
| 435 |
+
"month": 0,
|
| 436 |
+
"day": 0
|
| 437 |
+
},
|
| 438 |
+
"end": {
|
| 439 |
+
"year": 0,
|
| 440 |
+
"month": 0,
|
| 441 |
+
"day": 0
|
| 442 |
+
}
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"companyId": 16754,
|
| 446 |
+
"companyName": "Bessemer Venture Partners",
|
| 447 |
+
"companyUsername": "bessemer-venture-partners",
|
| 448 |
+
"companyURL": "https://www.linkedin.com/company/bessemer-venture-partners",
|
| 449 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQF2-_rRIldZpw/company-logo_400_400/company-logo_400_400/0/1719862419823/bessemer_venture_partners_logo?e=1733356800&v=beta&t=rX0N0rY1b8HL1G1_MpwJi16BhGWtBC-dyE2cBFi644A",
|
| 450 |
+
"companyIndustry": "Venture Capital & Private Equity",
|
| 451 |
+
"companyStaffCountRange": "11 - 50",
|
| 452 |
+
"title": "Vice President",
|
| 453 |
+
"multiLocaleTitle": null,
|
| 454 |
+
"multiLocaleCompanyName": null,
|
| 455 |
+
"location": "New York City Metropolitan Area",
|
| 456 |
+
"description": "",
|
| 457 |
+
"employmentType": "",
|
| 458 |
+
"start": {
|
| 459 |
+
"year": 2019,
|
| 460 |
+
"month": 7,
|
| 461 |
+
"day": 0
|
| 462 |
+
},
|
| 463 |
+
"end": {
|
| 464 |
+
"year": 2024,
|
| 465 |
+
"month": 2,
|
| 466 |
+
"day": 0
|
| 467 |
+
}
|
| 468 |
+
},
|
| 469 |
+
{
|
| 470 |
+
"companyId": 11745039,
|
| 471 |
+
"companyName": "Rillavoice",
|
| 472 |
+
"companyUsername": "rilla-co",
|
| 473 |
+
"companyURL": "https://www.linkedin.com/company/rilla-co",
|
| 474 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQGckOgzxr2p4g/company-logo_400_400/company-logo_400_400/0/1709081270062/rilla_co_logo?e=1733356800&v=beta&t=MvfFWJNVqsB3jaJxROLxP896fz5akuU8pndekbJ3C5g",
|
| 475 |
+
"companyIndustry": "Computer Software",
|
| 476 |
+
"companyStaffCountRange": "11 - 50",
|
| 477 |
+
"title": "Board Observer",
|
| 478 |
+
"multiLocaleTitle": null,
|
| 479 |
+
"multiLocaleCompanyName": null,
|
| 480 |
+
"location": "New York City Metropolitan Area",
|
| 481 |
+
"description": "",
|
| 482 |
+
"employmentType": "",
|
| 483 |
+
"start": {
|
| 484 |
+
"year": 2023,
|
| 485 |
+
"month": 0,
|
| 486 |
+
"day": 0
|
| 487 |
+
},
|
| 488 |
+
"end": {
|
| 489 |
+
"year": 2024,
|
| 490 |
+
"month": 0,
|
| 491 |
+
"day": 0
|
| 492 |
+
}
|
| 493 |
+
},
|
| 494 |
+
{
|
| 495 |
+
"companyId": 74099951,
|
| 496 |
+
"companyName": "Archy",
|
| 497 |
+
"companyUsername": "archydental",
|
| 498 |
+
"companyURL": "https://www.linkedin.com/company/archydental",
|
| 499 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D4E0BAQG13A6MTBOt5w/company-logo_400_400/company-logo_400_400/0/1724949981185/archydental_logo?e=1733356800&v=beta&t=prnsqQ4vb9F4cnt5_4Cmc6LHXYKt1ceIU6Z_G4AlzCw",
|
| 500 |
+
"companyIndustry": "Computer Software",
|
| 501 |
+
"companyStaffCountRange": "11 - 50",
|
| 502 |
+
"title": "Investor",
|
| 503 |
+
"multiLocaleTitle": null,
|
| 504 |
+
"multiLocaleCompanyName": null,
|
| 505 |
+
"location": "San Francisco Bay Area",
|
| 506 |
+
"description": "",
|
| 507 |
+
"employmentType": "",
|
| 508 |
+
"start": {
|
| 509 |
+
"year": 2022,
|
| 510 |
+
"month": 0,
|
| 511 |
+
"day": 0
|
| 512 |
+
},
|
| 513 |
+
"end": {
|
| 514 |
+
"year": 2024,
|
| 515 |
+
"month": 0,
|
| 516 |
+
"day": 0
|
| 517 |
+
}
|
| 518 |
+
},
|
| 519 |
+
{
|
| 520 |
+
"companyId": 18762446,
|
| 521 |
+
"companyName": "MaintainX",
|
| 522 |
+
"companyUsername": "getmaintainx",
|
| 523 |
+
"companyURL": "https://www.linkedin.com/company/getmaintainx",
|
| 524 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQEOOm1IB8UHxQ/company-logo_400_400/company-logo_400_400/0/1719257689258/getmaintainx_logo?e=1733356800&v=beta&t=bQzUQXAg00rNpk8-C34bicPL4Dnxwbyl-pp-Z47Ydr8",
|
| 525 |
+
"companyIndustry": "Computer Software",
|
| 526 |
+
"companyStaffCountRange": "201 - 500",
|
| 527 |
+
"title": "Board Observer",
|
| 528 |
+
"multiLocaleTitle": null,
|
| 529 |
+
"multiLocaleCompanyName": null,
|
| 530 |
+
"location": "San Francisco Bay Area",
|
| 531 |
+
"description": "",
|
| 532 |
+
"employmentType": "",
|
| 533 |
+
"start": {
|
| 534 |
+
"year": 2021,
|
| 535 |
+
"month": 0,
|
| 536 |
+
"day": 0
|
| 537 |
+
},
|
| 538 |
+
"end": {
|
| 539 |
+
"year": 2024,
|
| 540 |
+
"month": 0,
|
| 541 |
+
"day": 0
|
| 542 |
+
}
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"companyId": 18927115,
|
| 546 |
+
"companyName": "Spot AI",
|
| 547 |
+
"companyUsername": "spot-ai",
|
| 548 |
+
"companyURL": "https://www.linkedin.com/company/spot-ai",
|
| 549 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQG2LOnQKbIi4A/company-logo_400_400/company-logo_400_400/0/1630635635761/spot_ai_logo?e=1733356800&v=beta&t=eJx_hAO6SMYTUTnHrmE4-7bz37xkTh1rzjh0CqRBwCs",
|
| 550 |
+
"companyIndustry": "Computer Software",
|
| 551 |
+
"companyStaffCountRange": "51 - 200",
|
| 552 |
+
"title": "Investor",
|
| 553 |
+
"multiLocaleTitle": null,
|
| 554 |
+
"multiLocaleCompanyName": null,
|
| 555 |
+
"location": "San Francisco Bay Area",
|
| 556 |
+
"description": "",
|
| 557 |
+
"employmentType": "",
|
| 558 |
+
"start": {
|
| 559 |
+
"year": 2021,
|
| 560 |
+
"month": 0,
|
| 561 |
+
"day": 0
|
| 562 |
+
},
|
| 563 |
+
"end": {
|
| 564 |
+
"year": 2024,
|
| 565 |
+
"month": 0,
|
| 566 |
+
"day": 0
|
| 567 |
+
}
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"companyId": 31404660,
|
| 571 |
+
"companyName": "VendorPM",
|
| 572 |
+
"companyUsername": "vendorpm",
|
| 573 |
+
"companyURL": "https://www.linkedin.com/company/vendorpm",
|
| 574 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQEglOi17VDAUg/company-logo_400_400/company-logo_400_400/0/1688751388502/vendorpm_logo?e=1733356800&v=beta&t=g36lpENHyHRFusF-KOJfJvh97fsAezDC1FAqYzG7zUg",
|
| 575 |
+
"companyIndustry": "Computer Software",
|
| 576 |
+
"companyStaffCountRange": "11 - 50",
|
| 577 |
+
"title": "Board Observer",
|
| 578 |
+
"multiLocaleTitle": null,
|
| 579 |
+
"multiLocaleCompanyName": null,
|
| 580 |
+
"location": "Toronto, Ontario, Canada",
|
| 581 |
+
"description": "",
|
| 582 |
+
"employmentType": "",
|
| 583 |
+
"start": {
|
| 584 |
+
"year": 2021,
|
| 585 |
+
"month": 0,
|
| 586 |
+
"day": 0
|
| 587 |
+
},
|
| 588 |
+
"end": {
|
| 589 |
+
"year": 2024,
|
| 590 |
+
"month": 0,
|
| 591 |
+
"day": 0
|
| 592 |
+
}
|
| 593 |
+
},
|
| 594 |
+
{
|
| 595 |
+
"companyId": 18872387,
|
| 596 |
+
"companyName": "Luxury Presence",
|
| 597 |
+
"companyUsername": "luxurypresence",
|
| 598 |
+
"companyURL": "https://www.linkedin.com/company/luxurypresence",
|
| 599 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQEv9eTo6golbw/company-logo_400_400/company-logo_400_400/0/1655397407777/luxurypresence_logo?e=1733356800&v=beta&t=SKKunif574CP6v1Z2LjF1sk6ZM254eu5VWN7ESZ1ylc",
|
| 600 |
+
"companyIndustry": "Computer Software",
|
| 601 |
+
"companyStaffCountRange": "501 - 1000",
|
| 602 |
+
"title": "Board Director",
|
| 603 |
+
"multiLocaleTitle": null,
|
| 604 |
+
"multiLocaleCompanyName": null,
|
| 605 |
+
"location": "Los Angeles Metropolitan Area",
|
| 606 |
+
"description": "",
|
| 607 |
+
"employmentType": "",
|
| 608 |
+
"start": {
|
| 609 |
+
"year": 2021,
|
| 610 |
+
"month": 0,
|
| 611 |
+
"day": 0
|
| 612 |
+
},
|
| 613 |
+
"end": {
|
| 614 |
+
"year": 2024,
|
| 615 |
+
"month": 0,
|
| 616 |
+
"day": 0
|
| 617 |
+
}
|
| 618 |
+
},
|
| 619 |
+
{
|
| 620 |
+
"companyId": 3157549,
|
| 621 |
+
"companyName": "ServiceTitan",
|
| 622 |
+
"companyUsername": "servicetitan",
|
| 623 |
+
"companyURL": "https://www.linkedin.com/company/servicetitan",
|
| 624 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQEZXBsypK75Fg/company-logo_400_400/company-logo_400_400/0/1660079842958/servicetitan_logo?e=1733356800&v=beta&t=Ly_gt1iDc0qfGCBWgwrUHqnamNy7TZMPEDARe-tlJVA",
|
| 625 |
+
"companyIndustry": "Computer Software",
|
| 626 |
+
"companyStaffCountRange": "1001 - 5000",
|
| 627 |
+
"title": "Investor",
|
| 628 |
+
"multiLocaleTitle": null,
|
| 629 |
+
"multiLocaleCompanyName": null,
|
| 630 |
+
"location": "Los Angeles Metropolitan Area",
|
| 631 |
+
"description": "",
|
| 632 |
+
"employmentType": "",
|
| 633 |
+
"start": {
|
| 634 |
+
"year": 2021,
|
| 635 |
+
"month": 0,
|
| 636 |
+
"day": 0
|
| 637 |
+
},
|
| 638 |
+
"end": {
|
| 639 |
+
"year": 2024,
|
| 640 |
+
"month": 0,
|
| 641 |
+
"day": 0
|
| 642 |
+
}
|
| 643 |
+
},
|
| 644 |
+
{
|
| 645 |
+
"companyId": 18319862,
|
| 646 |
+
"companyName": "NALA",
|
| 647 |
+
"companyUsername": "nalamoney",
|
| 648 |
+
"companyURL": "https://www.linkedin.com/company/nalamoney",
|
| 649 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D4D0BAQGjdH4F8iQgGg/company-logo_400_400/company-logo_400_400/0/1704710437688/nalamoney_logo?e=1733356800&v=beta&t=vrKr_YzZMgCUv0XjRVWlWkxSBZLnNpl42wU87xvAZTM",
|
| 650 |
+
"companyIndustry": "Financial Services",
|
| 651 |
+
"companyStaffCountRange": "11 - 50",
|
| 652 |
+
"title": "Investor",
|
| 653 |
+
"multiLocaleTitle": null,
|
| 654 |
+
"multiLocaleCompanyName": null,
|
| 655 |
+
"location": "",
|
| 656 |
+
"description": "",
|
| 657 |
+
"employmentType": "",
|
| 658 |
+
"start": {
|
| 659 |
+
"year": 2021,
|
| 660 |
+
"month": 0,
|
| 661 |
+
"day": 0
|
| 662 |
+
},
|
| 663 |
+
"end": {
|
| 664 |
+
"year": 2024,
|
| 665 |
+
"month": 0,
|
| 666 |
+
"day": 0
|
| 667 |
+
}
|
| 668 |
+
},
|
| 669 |
+
{
|
| 670 |
+
"companyId": 70904927,
|
| 671 |
+
"companyName": "Jasper",
|
| 672 |
+
"companyUsername": "heyjasperai",
|
| 673 |
+
"companyURL": "https://www.linkedin.com/company/heyjasperai",
|
| 674 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQHsWD7LHz0Ekw/company-logo_400_400/company-logo_400_400/0/1719256081254?e=1733356800&v=beta&t=MknEiRcpHfjOrS4QK4FpHIisWPG7yvD1Mv8LFkrMl4k",
|
| 675 |
+
"companyIndustry": "Computer Software",
|
| 676 |
+
"companyStaffCountRange": "51 - 200",
|
| 677 |
+
"title": "Investor",
|
| 678 |
+
"multiLocaleTitle": null,
|
| 679 |
+
"multiLocaleCompanyName": null,
|
| 680 |
+
"location": "San Francisco Bay Area",
|
| 681 |
+
"description": "",
|
| 682 |
+
"employmentType": "",
|
| 683 |
+
"start": {
|
| 684 |
+
"year": 2021,
|
| 685 |
+
"month": 0,
|
| 686 |
+
"day": 0
|
| 687 |
+
},
|
| 688 |
+
"end": {
|
| 689 |
+
"year": 2024,
|
| 690 |
+
"month": 0,
|
| 691 |
+
"day": 0
|
| 692 |
+
}
|
| 693 |
+
},
|
| 694 |
+
{
|
| 695 |
+
"companyId": 18364911,
|
| 696 |
+
"companyName": "Shopmonkey",
|
| 697 |
+
"companyUsername": "shopmonkey",
|
| 698 |
+
"companyURL": "https://www.linkedin.com/company/shopmonkey",
|
| 699 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQGgBRI5FdMkug/company-logo_400_400/company-logo_400_400/0/1630537505248/shopmonkey_logo?e=1733356800&v=beta&t=4NG5tw0Oam6ezsJeCcvYl8dZ7OgnhOygV3f1Bl9WWyI",
|
| 700 |
+
"companyIndustry": "Computer Software",
|
| 701 |
+
"companyStaffCountRange": "51 - 200",
|
| 702 |
+
"title": "Board Observer",
|
| 703 |
+
"multiLocaleTitle": null,
|
| 704 |
+
"multiLocaleCompanyName": null,
|
| 705 |
+
"location": "San Francisco Bay Area",
|
| 706 |
+
"description": "",
|
| 707 |
+
"employmentType": "",
|
| 708 |
+
"start": {
|
| 709 |
+
"year": 2020,
|
| 710 |
+
"month": 0,
|
| 711 |
+
"day": 0
|
| 712 |
+
},
|
| 713 |
+
"end": {
|
| 714 |
+
"year": 2024,
|
| 715 |
+
"month": 0,
|
| 716 |
+
"day": 0
|
| 717 |
+
}
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"companyId": 1292340,
|
| 721 |
+
"companyName": "Syndio",
|
| 722 |
+
"companyUsername": "syndioinc",
|
| 723 |
+
"companyURL": "https://www.linkedin.com/company/syndioinc",
|
| 724 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQEDBc5ZaUmvWQ/company-logo_400_400/company-logo_400_400/0/1681938104635/syndioinc_logo?e=1733356800&v=beta&t=qIFwJgc8zf8xSgpjGqxuNmPm4HgsndG4AYQ1b61x3zM",
|
| 725 |
+
"companyIndustry": "Computer Software",
|
| 726 |
+
"companyStaffCountRange": "51 - 200",
|
| 727 |
+
"title": "Investor",
|
| 728 |
+
"multiLocaleTitle": null,
|
| 729 |
+
"multiLocaleCompanyName": null,
|
| 730 |
+
"location": "",
|
| 731 |
+
"description": "",
|
| 732 |
+
"employmentType": "",
|
| 733 |
+
"start": {
|
| 734 |
+
"year": 2020,
|
| 735 |
+
"month": 0,
|
| 736 |
+
"day": 0
|
| 737 |
+
},
|
| 738 |
+
"end": {
|
| 739 |
+
"year": 2024,
|
| 740 |
+
"month": 0,
|
| 741 |
+
"day": 0
|
| 742 |
+
}
|
| 743 |
+
},
|
| 744 |
+
{
|
| 745 |
+
"companyId": 18742807,
|
| 746 |
+
"companyName": "TRM Labs",
|
| 747 |
+
"companyUsername": "trmlabs",
|
| 748 |
+
"companyURL": "https://www.linkedin.com/company/trmlabs",
|
| 749 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4E0BAQGk-xkrdUoBpg/company-logo_400_400/company-logo_400_400/0/1638543126715/trmlabs_logo?e=1733356800&v=beta&t=7uFH1kBZBJRaHtBSTG3jq6ChQlI1ZQM3XoZIGQyBq7Y",
|
| 750 |
+
"companyIndustry": "Information Services",
|
| 751 |
+
"companyStaffCountRange": "201 - 500",
|
| 752 |
+
"title": "Board Observer",
|
| 753 |
+
"multiLocaleTitle": null,
|
| 754 |
+
"multiLocaleCompanyName": null,
|
| 755 |
+
"location": "San Francisco Bay Area",
|
| 756 |
+
"description": "",
|
| 757 |
+
"employmentType": "",
|
| 758 |
+
"start": {
|
| 759 |
+
"year": 2020,
|
| 760 |
+
"month": 0,
|
| 761 |
+
"day": 0
|
| 762 |
+
},
|
| 763 |
+
"end": {
|
| 764 |
+
"year": 2024,
|
| 765 |
+
"month": 0,
|
| 766 |
+
"day": 0
|
| 767 |
+
}
|
| 768 |
+
},
|
| 769 |
+
{
|
| 770 |
+
"companyId": 5903162,
|
| 771 |
+
"companyName": "Sila Nanotechnologies Inc.",
|
| 772 |
+
"companyUsername": "sila-nanotechnologies-inc-",
|
| 773 |
+
"companyURL": "https://www.linkedin.com/company/sila-nanotechnologies-inc-",
|
| 774 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQF7zqbzbyb9pQ/company-logo_400_400/company-logo_400_400/0/1639412633827/sila_nanotechnologies_inc__logo?e=1733356800&v=beta&t=2ei9-L8B4AfuZaEgu7zoHhu098YYcL2b5xUcsPMuwho",
|
| 775 |
+
"companyIndustry": "Renewables & Environment",
|
| 776 |
+
"companyStaffCountRange": "201 - 500",
|
| 777 |
+
"title": "Investor",
|
| 778 |
+
"multiLocaleTitle": null,
|
| 779 |
+
"multiLocaleCompanyName": null,
|
| 780 |
+
"location": "San Francisco Bay Areas",
|
| 781 |
+
"description": "",
|
| 782 |
+
"employmentType": "",
|
| 783 |
+
"start": {
|
| 784 |
+
"year": 2020,
|
| 785 |
+
"month": 0,
|
| 786 |
+
"day": 0
|
| 787 |
+
},
|
| 788 |
+
"end": {
|
| 789 |
+
"year": 2024,
|
| 790 |
+
"month": 0,
|
| 791 |
+
"day": 0
|
| 792 |
+
}
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"companyId": 18960490,
|
| 796 |
+
"companyName": "Courier",
|
| 797 |
+
"companyUsername": "trycourier",
|
| 798 |
+
"companyURL": "https://www.linkedin.com/company/trycourier",
|
| 799 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQGhsLWH5Oz1PA/company-logo_400_400/company-logo_400_400/0/1718748389821/trycourier_logo?e=1733356800&v=beta&t=fqs9mj9F17GSAQhALd6Et-BSRsZMxT51GrGLWStPcjg",
|
| 800 |
+
"companyIndustry": "Computer Software",
|
| 801 |
+
"companyStaffCountRange": "11 - 50",
|
| 802 |
+
"title": "Board Observer",
|
| 803 |
+
"multiLocaleTitle": null,
|
| 804 |
+
"multiLocaleCompanyName": null,
|
| 805 |
+
"location": "San Francisco Bay Area",
|
| 806 |
+
"description": "",
|
| 807 |
+
"employmentType": "",
|
| 808 |
+
"start": {
|
| 809 |
+
"year": 2019,
|
| 810 |
+
"month": 0,
|
| 811 |
+
"day": 0
|
| 812 |
+
},
|
| 813 |
+
"end": {
|
| 814 |
+
"year": 2024,
|
| 815 |
+
"month": 0,
|
| 816 |
+
"day": 0
|
| 817 |
+
}
|
| 818 |
+
},
|
| 819 |
+
{
|
| 820 |
+
"companyId": 5324440,
|
| 821 |
+
"companyName": "Productboard",
|
| 822 |
+
"companyUsername": "productboard",
|
| 823 |
+
"companyURL": "https://www.linkedin.com/company/productboard",
|
| 824 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQGNdwi0nLUPTg/company-logo_400_400/company-logo_400_400/0/1719256300453/productboard_logo?e=1733356800&v=beta&t=2cf3ZSkvsgxTY1dqhR-bl32uJgIiV7nmcIvqAIn9VTw",
|
| 825 |
+
"companyIndustry": "Computer Software",
|
| 826 |
+
"companyStaffCountRange": "201 - 500",
|
| 827 |
+
"title": "Board Observer",
|
| 828 |
+
"multiLocaleTitle": null,
|
| 829 |
+
"multiLocaleCompanyName": null,
|
| 830 |
+
"location": "",
|
| 831 |
+
"description": "",
|
| 832 |
+
"employmentType": "",
|
| 833 |
+
"start": {
|
| 834 |
+
"year": 2019,
|
| 835 |
+
"month": 0,
|
| 836 |
+
"day": 0
|
| 837 |
+
},
|
| 838 |
+
"end": {
|
| 839 |
+
"year": 2024,
|
| 840 |
+
"month": 0,
|
| 841 |
+
"day": 0
|
| 842 |
+
}
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
"companyId": 6588485,
|
| 846 |
+
"companyName": "Guild",
|
| 847 |
+
"companyUsername": "guildeducation",
|
| 848 |
+
"companyURL": "https://www.linkedin.com/company/guildeducation",
|
| 849 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/D560BAQFQjfjzaP-wmA/company-logo_400_400/company-logo_400_400/0/1681301994500/guildeducation_logo?e=1733356800&v=beta&t=8eSaHxcOPerDLCZtagOPVDl5rmeBNY0TDiSK0Jiu1GY",
|
| 850 |
+
"companyIndustry": "Higher Education",
|
| 851 |
+
"companyStaffCountRange": "1001 - 5000",
|
| 852 |
+
"title": "Investor",
|
| 853 |
+
"multiLocaleTitle": null,
|
| 854 |
+
"multiLocaleCompanyName": null,
|
| 855 |
+
"location": "Denver Metropolitan Area",
|
| 856 |
+
"description": "",
|
| 857 |
+
"employmentType": "",
|
| 858 |
+
"start": {
|
| 859 |
+
"year": 2019,
|
| 860 |
+
"month": 0,
|
| 861 |
+
"day": 0
|
| 862 |
+
},
|
| 863 |
+
"end": {
|
| 864 |
+
"year": 2024,
|
| 865 |
+
"month": 0,
|
| 866 |
+
"day": 0
|
| 867 |
+
}
|
| 868 |
+
},
|
| 869 |
+
{
|
| 870 |
+
"companyId": 2850862,
|
| 871 |
+
"companyName": "Canva",
|
| 872 |
+
"companyUsername": "canva",
|
| 873 |
+
"companyURL": "https://www.linkedin.com/company/canva",
|
| 874 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C4D0BAQGO1uzGzmVB-A/company-logo_400_400/company-logo_400_400/0/1656630679668/canva_logo?e=1733356800&v=beta&t=f-EU-W2e0K0IW1-sDDiwpYG0GsRBK2Nh2UX__pbjF38",
|
| 875 |
+
"companyIndustry": "Computer Software",
|
| 876 |
+
"companyStaffCountRange": "1001 - 5000",
|
| 877 |
+
"title": "Investor",
|
| 878 |
+
"multiLocaleTitle": null,
|
| 879 |
+
"multiLocaleCompanyName": null,
|
| 880 |
+
"location": "",
|
| 881 |
+
"description": "",
|
| 882 |
+
"employmentType": "",
|
| 883 |
+
"start": {
|
| 884 |
+
"year": 2019,
|
| 885 |
+
"month": 0,
|
| 886 |
+
"day": 0
|
| 887 |
+
},
|
| 888 |
+
"end": {
|
| 889 |
+
"year": 2024,
|
| 890 |
+
"month": 0,
|
| 891 |
+
"day": 0
|
| 892 |
+
}
|
| 893 |
+
}
|
| 894 |
+
],
|
| 895 |
+
"skills": [
|
| 896 |
+
{
|
| 897 |
+
"name": "Project Planning",
|
| 898 |
+
"passedSkillAssessment": false,
|
| 899 |
+
"endorsementsCount": 2
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"name": "Research",
|
| 903 |
+
"passedSkillAssessment": false,
|
| 904 |
+
"endorsementsCount": 14
|
| 905 |
+
},
|
| 906 |
+
{
|
| 907 |
+
"name": "Project Exec",
|
| 908 |
+
"passedSkillAssessment": false
|
| 909 |
+
},
|
| 910 |
+
{
|
| 911 |
+
"name": "Strategic Planning",
|
| 912 |
+
"passedSkillAssessment": false
|
| 913 |
+
},
|
| 914 |
+
{
|
| 915 |
+
"name": "Market Research",
|
| 916 |
+
"passedSkillAssessment": false,
|
| 917 |
+
"endorsementsCount": 6
|
| 918 |
+
},
|
| 919 |
+
{
|
| 920 |
+
"name": "Nonprofits",
|
| 921 |
+
"passedSkillAssessment": false,
|
| 922 |
+
"endorsementsCount": 11
|
| 923 |
+
},
|
| 924 |
+
{
|
| 925 |
+
"name": "Leadership",
|
| 926 |
+
"passedSkillAssessment": false,
|
| 927 |
+
"endorsementsCount": 9
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"name": "Strategy",
|
| 931 |
+
"passedSkillAssessment": false,
|
| 932 |
+
"endorsementsCount": 23
|
| 933 |
+
},
|
| 934 |
+
{
|
| 935 |
+
"name": "Entrepreneurship",
|
| 936 |
+
"passedSkillAssessment": false,
|
| 937 |
+
"endorsementsCount": 7
|
| 938 |
+
},
|
| 939 |
+
{
|
| 940 |
+
"name": "Economics",
|
| 941 |
+
"passedSkillAssessment": false,
|
| 942 |
+
"endorsementsCount": 3
|
| 943 |
+
},
|
| 944 |
+
{
|
| 945 |
+
"name": "Public Speaking",
|
| 946 |
+
"passedSkillAssessment": false,
|
| 947 |
+
"endorsementsCount": 16
|
| 948 |
+
},
|
| 949 |
+
{
|
| 950 |
+
"name": "Data Analysis",
|
| 951 |
+
"passedSkillAssessment": false,
|
| 952 |
+
"endorsementsCount": 8
|
| 953 |
+
},
|
| 954 |
+
{
|
| 955 |
+
"name": "Fundraising",
|
| 956 |
+
"passedSkillAssessment": false,
|
| 957 |
+
"endorsementsCount": 4
|
| 958 |
+
},
|
| 959 |
+
{
|
| 960 |
+
"name": "Venture Capital",
|
| 961 |
+
"passedSkillAssessment": false,
|
| 962 |
+
"endorsementsCount": 3
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"name": "Spanish",
|
| 966 |
+
"passedSkillAssessment": false,
|
| 967 |
+
"endorsementsCount": 4
|
| 968 |
+
},
|
| 969 |
+
{
|
| 970 |
+
"name": "Financial Modeling",
|
| 971 |
+
"passedSkillAssessment": false,
|
| 972 |
+
"endorsementsCount": 2
|
| 973 |
+
}
|
| 974 |
+
],
|
| 975 |
+
"givenRecommendation": null,
|
| 976 |
+
"givenRecommendationCount": 0,
|
| 977 |
+
"receivedRecommendation": null,
|
| 978 |
+
"receivedRecommendationCount": 0,
|
| 979 |
+
"courses": null,
|
| 980 |
+
"certifications": null,
|
| 981 |
+
"honors": null,
|
| 982 |
+
"projects": {
|
| 983 |
+
"total": 0,
|
| 984 |
+
"items": null
|
| 985 |
+
},
|
| 986 |
+
"volunteering": [
|
| 987 |
+
{
|
| 988 |
+
"title": "Venture Capital Advisor & Mentor",
|
| 989 |
+
"start": {
|
| 990 |
+
"year": 2011,
|
| 991 |
+
"month": 11,
|
| 992 |
+
"day": 0
|
| 993 |
+
},
|
| 994 |
+
"end": {
|
| 995 |
+
"year": 2015,
|
| 996 |
+
"month": 10,
|
| 997 |
+
"day": 0
|
| 998 |
+
},
|
| 999 |
+
"companyName": "BUILD",
|
| 1000 |
+
"CompanyId": "513390",
|
| 1001 |
+
"companyUrl": "https://www.linkedin.com/company/513390",
|
| 1002 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQGT3pm5lHpqVQ/company-logo_400_400/company-logo_400_400/0/1630640162308/build_logo?e=1733356800&v=beta&t=u9inGRmQwmaTL-DDgPc6nPPqVI630FhJ-PSK_xil_Vw"
|
| 1003 |
+
},
|
| 1004 |
+
{
|
| 1005 |
+
"title": "Alumni Board Member, Mentor",
|
| 1006 |
+
"start": {
|
| 1007 |
+
"year": 2012,
|
| 1008 |
+
"month": 11,
|
| 1009 |
+
"day": 0
|
| 1010 |
+
},
|
| 1011 |
+
"end": {
|
| 1012 |
+
"year": 2016,
|
| 1013 |
+
"month": 8,
|
| 1014 |
+
"day": 0
|
| 1015 |
+
},
|
| 1016 |
+
"companyName": "New Sector Alliance",
|
| 1017 |
+
"CompanyId": "24636",
|
| 1018 |
+
"companyUrl": "https://www.linkedin.com/company/24636",
|
| 1019 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQFXSfHrJYz7Cw/company-logo_400_400/company-logo_400_400/0/1669748969129/new_sector_alliance_logo?e=1733356800&v=beta&t=-G5ciTX5xibgKUH4ox1ABzNNtReauNHLlmYAMglU7nA"
|
| 1020 |
+
},
|
| 1021 |
+
{
|
| 1022 |
+
"title": "Mentor",
|
| 1023 |
+
"start": {
|
| 1024 |
+
"year": 2014,
|
| 1025 |
+
"month": 9,
|
| 1026 |
+
"day": 0
|
| 1027 |
+
},
|
| 1028 |
+
"end": {
|
| 1029 |
+
"year": 2015,
|
| 1030 |
+
"month": 9,
|
| 1031 |
+
"day": 0
|
| 1032 |
+
},
|
| 1033 |
+
"companyName": "Year Up",
|
| 1034 |
+
"CompanyId": "28447",
|
| 1035 |
+
"companyUrl": "https://www.linkedin.com/company/28447",
|
| 1036 |
+
"companyLogo": "https://media.licdn.com/dms/image/v2/C560BAQFsfLcEADumZg/company-logo_400_400/company-logo_400_400/0/1631350820683?e=1733356800&v=beta&t=-GRlO3yQYdYJUK6a6iP_jRzhVKartf6FaZ0sxKOQH1g"
|
| 1037 |
+
}
|
| 1038 |
+
],
|
| 1039 |
+
"supportedLocales": [
|
| 1040 |
+
{
|
| 1041 |
+
"country": "US",
|
| 1042 |
+
"language": "en"
|
| 1043 |
+
}
|
| 1044 |
+
],
|
| 1045 |
+
"multiLocaleFirstName": {
|
| 1046 |
+
"en": "Hansae"
|
| 1047 |
+
},
|
| 1048 |
+
"multiLocaleLastName": {
|
| 1049 |
+
"en": "Catlett"
|
| 1050 |
+
},
|
| 1051 |
+
"multiLocaleHeadline": {
|
| 1052 |
+
"en": "Partner at HOF Capital | Co-founder & GP at The MBA Fund"
|
| 1053 |
+
}
|
| 1054 |
+
}
|
template/src/notebooks/education_classifier.ipynb
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import json\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"from template.app.model.linkedin.linkedin_models import profile_from_json\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"with open(\"data/hansae_catlett.json\") as f:\n",
|
| 14 |
+
" data = json.load(f)\n",
|
| 15 |
+
" # convert to linkedin profile\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"profile = profile_from_json(data)"
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "code",
|
| 22 |
+
"execution_count": 3,
|
| 23 |
+
"metadata": {},
|
| 24 |
+
"outputs": [
|
| 25 |
+
{
|
| 26 |
+
"name": "stdout",
|
| 27 |
+
"output_type": "stream",
|
| 28 |
+
"text": [
|
| 29 |
+
"\u001b[2m2024-09-07 15:31:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 30 |
+
"\u001b[2m2024-09-07 15:31:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1069\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m122\u001b[0m\n",
|
| 31 |
+
"\u001b[2m2024-09-07 15:31:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 32 |
+
"\u001b[2m2024-09-07 15:31:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1023\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m117\u001b[0m\n",
|
| 33 |
+
"\u001b[2m2024-09-07 15:31:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 34 |
+
"\u001b[2m2024-09-07 15:31:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m988\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m145\u001b[0m\n",
|
| 35 |
+
"\u001b[2m2024-09-07 15:31:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 36 |
+
"\u001b[2m2024-09-07 15:31:47\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1031\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m127\u001b[0m\n",
|
| 37 |
+
"{\n",
|
| 38 |
+
" \"output\": \"MBA\",\n",
|
| 39 |
+
" \"confidence\": 1.0,\n",
|
| 40 |
+
" \"reasoning\": \"The specific LinkedIn education item clearly states that the candidate obtained a Master of Business Administration (M.B.A.) from Stanford University Graduate School of Business, with a period of study from 2016 to 2019. This matches exactly with the educational background provided in the full resume, which also lists the same degree and institution. The presence of additional activities and awards in the LinkedIn item further supports the classification as an MBA, confirming the candidate's engagement and accomplishments during their studies. Therefore, the classification is confidently categorized as \\\"MBA.\\\"\"\n",
|
| 41 |
+
"}\n",
|
| 42 |
+
"{\n",
|
| 43 |
+
" \"output\": \"Graduate School\",\n",
|
| 44 |
+
" \"confidence\": 1.0,\n",
|
| 45 |
+
" \"reasoning\": \"The specific LinkedIn education item clearly states that the candidate obtained a Master of Public Policy (M.P.P.) from Stanford University, which aligns perfectly with the information provided in the full resume. The degree is a graduate-level qualification, and the period of study (2016 to 2019) matches the timeline in the resume. Additionally, the description of the candidate's work during this program further supports the classification as a graduate school education. Therefore, it is accurately classified as GRAD_SCHOOL.\"\n",
|
| 46 |
+
"}\n",
|
| 47 |
+
"{\n",
|
| 48 |
+
" \"output\": \"Undergraduate (Incomplete)\",\n",
|
| 49 |
+
" \"confidence\": 0.9,\n",
|
| 50 |
+
" \"reasoning\": \"The specific LinkedIn education item indicates that the candidate participated in a \\\"Study Abroad\\\" program at the University of New South Wales, which is typically associated with undergraduate studies. The resume shows that the candidate completed a Bachelor of Arts at Harvard University, and the study abroad experience aligns with the undergraduate level of education. Since the degree type is explicitly labeled as \\\"Study Abroad,\\\" it falls under the category of \\\"Undergraduate (Incomplete)\\\" as it does not represent a completed degree but rather an additional educational experience during their undergraduate studies. The confidence level is high due to the clear context provided by both the resume and the LinkedIn item.\"\n",
|
| 51 |
+
"}\n",
|
| 52 |
+
"{\n",
|
| 53 |
+
" \"output\": \"Undergraduate (Completed)\",\n",
|
| 54 |
+
" \"confidence\": 1.0,\n",
|
| 55 |
+
" \"reasoning\": \"The specific LinkedIn education item clearly states that the candidate earned a Bachelor of Arts degree in Biomedical Engineering and Philosophy from Harvard University, with a graduation period from 2007 to 2011. This aligns perfectly with the information provided in the full resume, which also lists the same degree and institution, confirming its completion. Since the degree is a completed undergraduate degree, it fits squarely into the \\\"Undergraduate (Completed)\\\" category. The additional details about graduating with honors and involvement in various activities further support the classification as a completed undergraduate education.\"\n",
|
| 56 |
+
"}\n",
|
| 57 |
+
"{\n",
|
| 58 |
+
" \"output\": \"MBA\",\n",
|
| 59 |
+
" \"confidence\": 1.0,\n",
|
| 60 |
+
" \"reasoning\": \"The specific LinkedIn education item clearly states that the candidate obtained a Master of Business Administration (M.B.A.) from Stanford University Graduate School of Business, with a period of study from 2016 to 2019. This matches exactly with the educational background provided in the full resume, which also lists the same degree and institution. The presence of additional activities and awards in the LinkedIn item further supports the classification as an MBA, confirming the candidate's engagement and accomplishments during their studies. Therefore, the classification is confidently categorized as \\\"MBA.\\\"\"\n",
|
| 61 |
+
"}\n",
|
| 62 |
+
"{\n",
|
| 63 |
+
" \"output\": \"Graduate School\",\n",
|
| 64 |
+
" \"confidence\": 1.0,\n",
|
| 65 |
+
" \"reasoning\": \"The specific LinkedIn education item clearly states that the candidate obtained a Master of Public Policy (M.P.P.) from Stanford University, which aligns perfectly with the information provided in the full resume. The degree is a graduate-level qualification, and the period of study (2016 to 2019) matches the timeline in the resume. Additionally, the description of the candidate's work during this program further supports the classification as a graduate school education. Therefore, it is accurately classified as GRAD_SCHOOL.\"\n",
|
| 66 |
+
"}\n",
|
| 67 |
+
"{\n",
|
| 68 |
+
" \"output\": \"Undergraduate (Incomplete)\",\n",
|
| 69 |
+
" \"confidence\": 0.9,\n",
|
| 70 |
+
" \"reasoning\": \"The specific LinkedIn education item indicates that the candidate participated in a \\\"Study Abroad\\\" program at the University of New South Wales, which is typically associated with undergraduate studies. The resume shows that the candidate completed a Bachelor of Arts at Harvard University, and the study abroad experience aligns with the undergraduate level of education. Since the degree type is explicitly labeled as \\\"Study Abroad,\\\" it falls under the category of \\\"Undergraduate (Incomplete)\\\" as it does not represent a completed degree but rather an additional educational experience during their undergraduate studies. The confidence level is high due to the clear context provided by both the resume and the LinkedIn item.\"\n",
|
| 71 |
+
"}\n",
|
| 72 |
+
"{\n",
|
| 73 |
+
" \"output\": \"Undergraduate (Completed)\",\n",
|
| 74 |
+
" \"confidence\": 1.0,\n",
|
| 75 |
+
" \"reasoning\": \"The specific LinkedIn education item clearly states that the candidate earned a Bachelor of Arts degree in Biomedical Engineering and Philosophy from Harvard University, with a graduation period from 2007 to 2011. This aligns perfectly with the information provided in the full resume, which also lists the same degree and institution, confirming its completion. Since the degree is a completed undergraduate degree, it fits squarely into the \\\"Undergraduate (Completed)\\\" category. The additional details about graduating with honors and involvement in various activities further support the classification as a completed undergraduate education.\"\n",
|
| 76 |
+
"}\n"
|
| 77 |
+
]
|
| 78 |
+
}
|
| 79 |
+
],
|
| 80 |
+
"source": [
|
| 81 |
+
"import asyncio\n",
|
| 82 |
+
"\n",
|
| 83 |
+
"from template.app.education_classifier import EducationClassifier\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"education_classifier = EducationClassifier()\n",
|
| 86 |
+
"\n",
|
| 87 |
+
"all_educations_classified = []\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"async def classify_education(profile, e):\n",
|
| 91 |
+
" classification = await education_classifier.classify_education(profile, e)\n",
|
| 92 |
+
" all_educations_classified.append(classification)\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"\n",
|
| 95 |
+
"await asyncio.gather(*[classify_education(profile, e) for e in profile.educations])\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"for e in all_educations_classified:\n",
|
| 98 |
+
" print(e.model_dump_json(indent=2))"
|
| 99 |
+
]
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"cell_type": "code",
|
| 103 |
+
"execution_count": null,
|
| 104 |
+
"metadata": {},
|
| 105 |
+
"outputs": [],
|
| 106 |
+
"source": []
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"cell_type": "code",
|
| 110 |
+
"execution_count": null,
|
| 111 |
+
"metadata": {},
|
| 112 |
+
"outputs": [],
|
| 113 |
+
"source": []
|
| 114 |
+
}
|
| 115 |
+
],
|
| 116 |
+
"metadata": {
|
| 117 |
+
"kernelspec": {
|
| 118 |
+
"display_name": ".venv",
|
| 119 |
+
"language": "python",
|
| 120 |
+
"name": "python3"
|
| 121 |
+
},
|
| 122 |
+
"language_info": {
|
| 123 |
+
"codemirror_mode": {
|
| 124 |
+
"name": "ipython",
|
| 125 |
+
"version": 3
|
| 126 |
+
},
|
| 127 |
+
"file_extension": ".py",
|
| 128 |
+
"mimetype": "text/x-python",
|
| 129 |
+
"name": "python",
|
| 130 |
+
"nbconvert_exporter": "python",
|
| 131 |
+
"pygments_lexer": "ipython3",
|
| 132 |
+
"version": "3.12.2"
|
| 133 |
+
}
|
| 134 |
+
},
|
| 135 |
+
"nbformat": 4,
|
| 136 |
+
"nbformat_minor": 2
|
| 137 |
+
}
|
template/src/notebooks/work_experience_classifier.ipynb
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import json\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"from template.app.model.linkedin.linkedin_models import profile_from_json\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"with open(\"data/hansae_catlett.json\") as f:\n",
|
| 14 |
+
" data = json.load(f)\n",
|
| 15 |
+
" # convert to linkedin profile\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"profile = profile_from_json(data)"
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "code",
|
| 22 |
+
"execution_count": 2,
|
| 23 |
+
"metadata": {},
|
| 24 |
+
"outputs": [
|
| 25 |
+
{
|
| 26 |
+
"name": "stdout",
|
| 27 |
+
"output_type": "stream",
|
| 28 |
+
"text": [
|
| 29 |
+
"\u001b[2m2024-09-07 17:32:34\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 30 |
+
"\u001b[2m2024-09-07 17:32:34\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 31 |
+
"\u001b[2m2024-09-07 17:32:34\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 32 |
+
"\u001b[2m2024-09-07 17:32:34\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 33 |
+
"\u001b[2m2024-09-07 17:32:35\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 34 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 35 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1350\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m115\u001b[0m\n",
|
| 36 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 37 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1351\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m116\u001b[0m\n",
|
| 38 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 39 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1548\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m123\u001b[0m\n",
|
| 40 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 41 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1353\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m108\u001b[0m\n",
|
| 42 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 43 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1352\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m121\u001b[0m\n",
|
| 44 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 45 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1355\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m111\u001b[0m\n",
|
| 46 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 47 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1355\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m115\u001b[0m\n",
|
| 48 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 49 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1354\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m137\u001b[0m\n",
|
| 50 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 51 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1386\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m139\u001b[0m\n",
|
| 52 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 53 |
+
"\u001b[2m2024-09-07 17:32:37\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1354\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m127\u001b[0m\n",
|
| 54 |
+
"{\n",
|
| 55 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 56 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 57 |
+
" \"confidence\": 0.9,\n",
|
| 58 |
+
" \"reasoning\": \"The work experience as an \\\"Investor\\\" at Spot AI indicates a role focused on investing in a company, which aligns with the secondary job type of INVESTING. The primary job type is classified as OTHER because this position does not represent a full-time role or a traditional employment structure, but rather an investment role that is likely part-time or project-based. The candidate's extensive background in venture capital and investment roles, as seen in their resume, supports this classification.\"\n",
|
| 59 |
+
"}\n",
|
| 60 |
+
"{\n",
|
| 61 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 62 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 63 |
+
" \"confidence\": 0.9,\n",
|
| 64 |
+
" \"reasoning\": \"The work experience at Archy is classified as FULL_TIME because it is a professional role held from 2022 to 2024, indicating ongoing employment. The title \\\"Investor\\\" aligns with the secondary job type of INVESTING, as it directly relates to the candidate's role in a venture capital context, which is supported by their extensive experience in investment roles, including their position as Vice President at Bessemer Venture Partners and Co-Founder at The MBA Fund.\"\n",
|
| 65 |
+
"}\n",
|
| 66 |
+
"{\n",
|
| 67 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 68 |
+
" \"secondary_job_type\": \"Entrepreneur / Founder\",\n",
|
| 69 |
+
" \"confidence\": 0.9,\n",
|
| 70 |
+
" \"reasoning\": \"The work experience item indicates that the candidate is a Co-Founder and General Partner at The MBA Fund, which is a venture capital firm. This role suggests ongoing, full-time involvement in a professional capacity, classifying it as FULL_TIME. Additionally, being a Co-Founder aligns with the ENTREPRENEUR_FOUNDER category, as it involves starting and managing a business. The candidate's extensive background in venture capital, as seen in their resume, further supports this classification.\"\n",
|
| 71 |
+
"}\n",
|
| 72 |
+
"{\n",
|
| 73 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 74 |
+
" \"secondary_job_type\": \"Other\",\n",
|
| 75 |
+
" \"confidence\": 0.9,\n",
|
| 76 |
+
" \"reasoning\": \"The role of \\\"Board Observer\\\" at MaintainX is not a full-time position and does not fit into traditional employment categories like full-time or internship. It is more of an advisory role, but since it does not involve direct investment or operational responsibilities, it is classified as \\\"OTHER.\\\" The candidate's extensive experience in venture capital and board roles supports this classification, as they are involved in oversight rather than direct management or operational tasks.\"\n",
|
| 77 |
+
"}\n",
|
| 78 |
+
"{\n",
|
| 79 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 80 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 81 |
+
" \"confidence\": 0.9,\n",
|
| 82 |
+
" \"reasoning\": \"The work experience as a Board Observer at VendorPM is not a full-time role but rather a position that involves oversight and advisory responsibilities, which aligns with the OTHER category. Additionally, the candidate's extensive background in venture capital and investing, as evidenced by their roles at HOF Capital and Bessemer Venture Partners, supports classifying this experience under INVESTING. The confidence level is high due to the clear alignment of the role with investment activities, despite the lack of a traditional employment structure.\"\n",
|
| 83 |
+
"}\n",
|
| 84 |
+
"{\n",
|
| 85 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 86 |
+
" \"secondary_job_type\": \"Other\",\n",
|
| 87 |
+
" \"confidence\": 0.9,\n",
|
| 88 |
+
" \"reasoning\": \"The work experience as a Board Director at Luxury Presence is classified as \\\"OTHER\\\" for both primary and secondary job types because it does not fit the traditional definitions of full-time employment or any specific professional role like investing or consulting. The position is more of a governance role rather than a direct employment position, and it involves oversight rather than operational responsibilities. The candidate's resume indicates a strong background in venture capital and board roles, which supports this classification.\"\n",
|
| 89 |
+
"}\n",
|
| 90 |
+
"{\n",
|
| 91 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 92 |
+
" \"secondary_job_type\": \"Consulting\",\n",
|
| 93 |
+
" \"confidence\": 0.8,\n",
|
| 94 |
+
" \"reasoning\": \"The role of \\\"Board Observer\\\" at Rillavoice is not a full-time position but rather an advisory role, which aligns it with the OTHER category for primary job type. The responsibilities typically associated with board observer roles often involve providing strategic guidance and insights, which can be classified under CONSULTING for the secondary job type. The candidate's extensive experience in venture capital and advisory roles supports this classification, as they are likely leveraging their expertise to guide the company.\"\n",
|
| 95 |
+
"}\n",
|
| 96 |
+
"{\n",
|
| 97 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 98 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 99 |
+
" \"confidence\": 0.9,\n",
|
| 100 |
+
" \"reasoning\": \"The candidate's role as a Partner at HOF Capital, which is classified under Venture Capital & Private Equity, indicates a full-time position in a professional investing capacity. This aligns with their extensive background in venture capital, as evidenced by their previous role as Vice President at Bessemer Venture Partners and their co-founding of The MBA Fund. The combination of these roles supports the classification of both primary and secondary job types as FULL_TIME and INVESTING, respectively. The confidence level is high due to the clear alignment of the job title and industry with the definitions provided.\"\n",
|
| 101 |
+
"}\n",
|
| 102 |
+
"{\n",
|
| 103 |
+
" \"primary_job_type\": \"Full-time\",\n",
|
| 104 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 105 |
+
" \"confidence\": 1.0,\n",
|
| 106 |
+
" \"reasoning\": \"The work experience item indicates that the candidate held a full-time position as Vice President at Bessemer Venture Partners, which is a venture capital firm. This aligns with the primary job type classification of FULL_TIME. Additionally, the role is clearly within the realm of professional investing, as Bessemer Venture Partners operates in the venture capital and private equity industry, thus categorizing it as INVESTING for the secondary job type. The candidate's extensive background in venture capital, including their current role and previous experience as a co-founder of a venture capital firm, further supports this classification.\"\n",
|
| 107 |
+
"}\n",
|
| 108 |
+
"{\n",
|
| 109 |
+
" \"primary_job_type\": \"Other\",\n",
|
| 110 |
+
" \"secondary_job_type\": \"Investing\",\n",
|
| 111 |
+
" \"confidence\": 0.9,\n",
|
| 112 |
+
" \"reasoning\": \"The work experience item lists the candidate as an \\\"Investor\\\" at ServiceTitan, which aligns with their roles in venture capital and investment as seen in their resume. Given that this position does not indicate a full-time employment status and lacks a detailed description, it is classified as \\\"OTHER.\\\" However, the nature of the role clearly fits within the \\\"INVESTING\\\" category, as it involves investment activities in a software company. The confidence level is high due to the clear alignment with the candidate's investment-focused career trajectory.\"\n",
|
| 113 |
+
"}\n"
|
| 114 |
+
]
|
| 115 |
+
}
|
| 116 |
+
],
|
| 117 |
+
"source": [
|
| 118 |
+
"import asyncio\n",
|
| 119 |
+
"\n",
|
| 120 |
+
"from template.app.work_experience_classifier import WorkExperienceClassifier\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"work_experience_classifier = WorkExperienceClassifier()\n",
|
| 123 |
+
"\n",
|
| 124 |
+
"all_positions_classified = []\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"async def classify_work_experience(profile, e):\n",
|
| 128 |
+
" classification = await work_experience_classifier.classify_work_experience(profile, e)\n",
|
| 129 |
+
" all_positions_classified.append(classification)\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"await asyncio.gather(*[classify_work_experience(profile, w) for w in profile.positions])\n",
|
| 133 |
+
"\n",
|
| 134 |
+
"for e in all_positions_classified:\n",
|
| 135 |
+
" print(e.model_dump_json(indent=2))"
|
| 136 |
+
]
|
| 137 |
+
}
|
| 138 |
+
],
|
| 139 |
+
"metadata": {
|
| 140 |
+
"kernelspec": {
|
| 141 |
+
"display_name": ".venv",
|
| 142 |
+
"language": "python",
|
| 143 |
+
"name": "python3"
|
| 144 |
+
},
|
| 145 |
+
"language_info": {
|
| 146 |
+
"codemirror_mode": {
|
| 147 |
+
"name": "ipython",
|
| 148 |
+
"version": 3
|
| 149 |
+
},
|
| 150 |
+
"file_extension": ".py",
|
| 151 |
+
"mimetype": "text/x-python",
|
| 152 |
+
"name": "python",
|
| 153 |
+
"nbconvert_exporter": "python",
|
| 154 |
+
"pygments_lexer": "ipython3",
|
| 155 |
+
"version": "3.12.2"
|
| 156 |
+
}
|
| 157 |
+
},
|
| 158 |
+
"nbformat": 4,
|
| 159 |
+
"nbformat_minor": 2
|
| 160 |
+
}
|
template/src/template/__init__.py
ADDED
|
File without changes
|
template/src/template/app/__init__.py
ADDED
|
File without changes
|
template/src/template/app/bindings.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from template.app.prompts.prompt_loader import PromptLoader
|
| 2 |
+
from template.llm.openai.openai import AsyncOpenAIService
|
| 3 |
+
from template.llm.openai.openai_model import OpenAIModel
|
| 4 |
+
|
| 5 |
+
prompt_loader = PromptLoader()
|
| 6 |
+
open_ai_service = AsyncOpenAIService(OpenAIModel.GPT_4_MINI)
|
template/src/template/app/education_classifier.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
|
| 5 |
+
from template.app import bindings
|
| 6 |
+
from template.app.model.linkedin.linkedin_formatters import format_education, format_profile_as_resume
|
| 7 |
+
from template.app.model.linkedin.linkedin_models import Education, LinkedinProfile
|
| 8 |
+
from template.app.prompts.prompt_loader import PromptLoader
|
| 9 |
+
from template.llm.llm_service import LLMService
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class SchoolType(Enum):
|
| 13 |
+
"""
|
| 14 |
+
Enumeration of different school types for education classification.
|
| 15 |
+
|
| 16 |
+
This enum represents various levels and types of educational institutions.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
PRIMARY_SECONDARY = "Primary / Secondary School"
|
| 20 |
+
UNDERGRAD_INCOMPLETE = "Undergraduate (Incomplete)"
|
| 21 |
+
UNDERGRAD_COMPLETED = "Undergraduate (Completed)"
|
| 22 |
+
MBA = "MBA"
|
| 23 |
+
LAW_SCHOOL = "Law School"
|
| 24 |
+
GRAD_SCHOOL = "Graduate School"
|
| 25 |
+
PHD = "PhD"
|
| 26 |
+
OTHER = "Other"
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
_SCHOOL_TYPE_MAPPING: dict[str, SchoolType] = {
|
| 30 |
+
"PRIMARY_SECONDARY": SchoolType.PRIMARY_SECONDARY,
|
| 31 |
+
"UNDERGRAD_INCOMPLETE": SchoolType.UNDERGRAD_INCOMPLETE,
|
| 32 |
+
"UNDERGRAD_COMPLETED": SchoolType.UNDERGRAD_COMPLETED,
|
| 33 |
+
"MBA": SchoolType.MBA,
|
| 34 |
+
"LAW_SCHOOL": SchoolType.LAW_SCHOOL,
|
| 35 |
+
"GRAD_SCHOOL": SchoolType.GRAD_SCHOOL,
|
| 36 |
+
"PHD": SchoolType.PHD,
|
| 37 |
+
"OTHER": SchoolType.OTHER,
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class EducationClassification(BaseModel):
|
| 42 |
+
"""
|
| 43 |
+
Pydantic model representing the classification result for an education item.
|
| 44 |
+
|
| 45 |
+
Attributes:
|
| 46 |
+
output (SchoolType): The classified school type.
|
| 47 |
+
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 48 |
+
reasoning (str): Explanation for the classification decision.
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
output: SchoolType = Field(description="The classified school type")
|
| 52 |
+
confidence: float = Field(description="Confidence level between 0.0 and 1.0")
|
| 53 |
+
reasoning: str = Field(description="Explanation for the classification")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class EducationClassifier:
|
| 57 |
+
"""
|
| 58 |
+
A class for classifying education items from LinkedIn profiles.
|
| 59 |
+
|
| 60 |
+
This classifier uses a language model to determine the type of educational
|
| 61 |
+
institution and program based on the information provided in a LinkedIn profile.
|
| 62 |
+
|
| 63 |
+
Attributes:
|
| 64 |
+
_llm_service (LLMService): The language model service used for classification.
|
| 65 |
+
_prompt_template (Any): The template for generating prompts for the language model.
|
| 66 |
+
_prompt_loader (PromptLoader): The loader for prompt templates.
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
@staticmethod
|
| 70 |
+
def _parse_output(output: str) -> EducationClassification:
|
| 71 |
+
"""
|
| 72 |
+
Parse the output from the language model into an EducationClassification object.
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
output (str): The raw output string from the language model.
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
EducationClassification: A structured representation of the classification result.
|
| 79 |
+
|
| 80 |
+
Raises:
|
| 81 |
+
ValueError: If the output contains an unknown school type.
|
| 82 |
+
"""
|
| 83 |
+
lines = output.strip().split("\n")
|
| 84 |
+
parsed = {key.strip(): value.strip() for line in lines for key, value in [line.split(":", 1)]}
|
| 85 |
+
|
| 86 |
+
match parsed["output"].upper():
|
| 87 |
+
case school_type if school_type in _SCHOOL_TYPE_MAPPING:
|
| 88 |
+
return EducationClassification(
|
| 89 |
+
output=_SCHOOL_TYPE_MAPPING[school_type],
|
| 90 |
+
confidence=float(parsed["confidence"]),
|
| 91 |
+
reasoning=parsed["reasoning"],
|
| 92 |
+
)
|
| 93 |
+
case _:
|
| 94 |
+
raise ValueError(f"Unknown school type: {parsed['output']}")
|
| 95 |
+
|
| 96 |
+
def __init__(
|
| 97 |
+
self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
|
| 98 |
+
):
|
| 99 |
+
"""
|
| 100 |
+
Initialize the EducationClassifier.
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
llm_service (LLMService, optional): The language model service to use.
|
| 104 |
+
Defaults to the OpenAI service defined in bindings.
|
| 105 |
+
prompt_loader (PromptLoader, optional): The prompt loader to use.
|
| 106 |
+
Defaults to the prompt loader defined in bindings.
|
| 107 |
+
"""
|
| 108 |
+
self._llm_service = llm_service
|
| 109 |
+
self._prompt_template = prompt_loader.load_template("education_classifier/1 - education_classifier")
|
| 110 |
+
self._prompt_loader = prompt_loader
|
| 111 |
+
|
| 112 |
+
async def classify_education(
|
| 113 |
+
self, linkedin_profile: LinkedinProfile, education: Education
|
| 114 |
+
) -> EducationClassification:
|
| 115 |
+
"""
|
| 116 |
+
Classify a single education item from a LinkedIn profile.
|
| 117 |
+
|
| 118 |
+
This method prepares the input for the language model, sends the query,
|
| 119 |
+
and processes the result to classify the education item.
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
linkedin_profile (LinkedinProfile): The full LinkedIn profile of the individual.
|
| 123 |
+
education (Education): The specific education item to classify.
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
EducationClassification: The classification result for the education item.
|
| 127 |
+
|
| 128 |
+
Raises:
|
| 129 |
+
ValueError: If the prompt evaluation fails to produce a result.
|
| 130 |
+
"""
|
| 131 |
+
prompt = self._prompt_loader.create_prompt(
|
| 132 |
+
self._prompt_template,
|
| 133 |
+
llm_service=self._llm_service,
|
| 134 |
+
output_formatter=EducationClassifier._parse_output,
|
| 135 |
+
resume=format_profile_as_resume(linkedin_profile),
|
| 136 |
+
education=format_education(education),
|
| 137 |
+
)
|
| 138 |
+
return await prompt.evaluate() # type: ignore
|
template/src/template/app/model/linkedin/linkedin_formatters.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import xml.dom.minidom as minidom
|
| 2 |
+
import xml.etree.ElementTree as ET
|
| 3 |
+
from typing import Final, List
|
| 4 |
+
|
| 5 |
+
from template.app.model.linkedin.linkedin_models import DateComponent, Education, LinkedinProfile, Position
|
| 6 |
+
|
| 7 |
+
_MONTHS: Final[list[str]] = [
|
| 8 |
+
"January",
|
| 9 |
+
"February",
|
| 10 |
+
"March",
|
| 11 |
+
"April",
|
| 12 |
+
"May",
|
| 13 |
+
"June",
|
| 14 |
+
"July",
|
| 15 |
+
"August",
|
| 16 |
+
"September",
|
| 17 |
+
"October",
|
| 18 |
+
"November",
|
| 19 |
+
"December",
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _format_date_component(date_component: DateComponent | None) -> str:
|
| 24 |
+
"""
|
| 25 |
+
Formats a DateComponent object into a string representation of a date.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
date_component (DateComponent): The DateComponent object to be formatted.
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
str: The formatted date string.
|
| 32 |
+
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
# Helper function to format day with ordinal suffix
|
| 36 |
+
def _format_day(day: int) -> str:
|
| 37 |
+
if 11 <= day <= 13:
|
| 38 |
+
suffix = "th"
|
| 39 |
+
else:
|
| 40 |
+
suffix = ["th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th"][day % 10]
|
| 41 |
+
return str(day) + suffix
|
| 42 |
+
|
| 43 |
+
match date_component:
|
| 44 |
+
case None:
|
| 45 |
+
return "Present"
|
| 46 |
+
case DateComponent(year=year, month=month, day=day) if year and month and day:
|
| 47 |
+
return f"{_MONTHS[month - 1]} {_format_day(day)}, {year}"
|
| 48 |
+
case DateComponent(year=year, month=month) if year and month:
|
| 49 |
+
return f"{_MONTHS[month - 1]} {year}"
|
| 50 |
+
case DateComponent(year=year) if year:
|
| 51 |
+
return str(year)
|
| 52 |
+
case _:
|
| 53 |
+
return "Present"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# Function to print education details
|
| 57 |
+
def format_education_human(education: List[Education]) -> str:
|
| 58 |
+
"""
|
| 59 |
+
Formats the education information in a human-readable format.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
education (List[Education]): A list of Education objects representing the education information.
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
str: The formatted education information as a string.
|
| 66 |
+
"""
|
| 67 |
+
result = ""
|
| 68 |
+
if not education or len(education) == 0:
|
| 69 |
+
return result
|
| 70 |
+
result += "Education\n---------\n"
|
| 71 |
+
for edu in education:
|
| 72 |
+
start_date = _format_date_component(edu.start)
|
| 73 |
+
end_date = _format_date_component(edu.end)
|
| 74 |
+
|
| 75 |
+
result += f"{edu.school_name} - {edu.degree or ''}, {edu.field_of_study or ''}\n"
|
| 76 |
+
if start_date != "Present":
|
| 77 |
+
result += f"Period: {start_date} to {end_date}\n"
|
| 78 |
+
if edu.description:
|
| 79 |
+
result += f"Description:\n{edu.description}\n"
|
| 80 |
+
result += "\n"
|
| 81 |
+
return result
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _format_work_experience_human(positions: List[Position]) -> str:
|
| 85 |
+
"""
|
| 86 |
+
Formats the work experience section of a LinkedIn profile in a human-readable format.
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
positions (List[Position]): A list of Position objects representing work experience.
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
str: A formatted string representing the work experience section.
|
| 93 |
+
|
| 94 |
+
"""
|
| 95 |
+
result = ""
|
| 96 |
+
if not positions or len(positions) == 0:
|
| 97 |
+
return result
|
| 98 |
+
result += "Work Experience\n---------------\n"
|
| 99 |
+
for pos in positions:
|
| 100 |
+
result += f"{pos.title} at {pos.company_name}\n"
|
| 101 |
+
start_date = _format_date_component(pos.start)
|
| 102 |
+
end_date = _format_date_component(pos.end)
|
| 103 |
+
result += f"Period: {start_date} to {end_date}\n"
|
| 104 |
+
if pos.location:
|
| 105 |
+
result += f"Location: {pos.location}\n"
|
| 106 |
+
if pos.description:
|
| 107 |
+
result += f"Description:\n{pos.description}\n"
|
| 108 |
+
result += "\n"
|
| 109 |
+
return result
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def format_profile_as_resume(profile: LinkedinProfile) -> str:
|
| 113 |
+
"""
|
| 114 |
+
Formats the given LinkedIn profile as a resume.
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
profile (Profile): The LinkedIn profile to be formatted.
|
| 118 |
+
|
| 119 |
+
Returns:
|
| 120 |
+
str: The formatted resume as a string.
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
Example output:
|
| 124 |
+
Education
|
| 125 |
+
---------
|
| 126 |
+
Rice University - Master’s Degree, Computer Science
|
| 127 |
+
Period: 2005 to 2006
|
| 128 |
+
|
| 129 |
+
Rice University - Bachelor’s Degree, Computer Science, Economics
|
| 130 |
+
Period: 2001 to 2005
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
Work Experience
|
| 134 |
+
---------------
|
| 135 |
+
Technical Product Manager at Airbnb.com
|
| 136 |
+
Period: May 2011 to June 2012
|
| 137 |
+
Description:
|
| 138 |
+
A community marketplace for unique spaces.
|
| 139 |
+
|
| 140 |
+
CEO & Founder at UniversityTutor.com
|
| 141 |
+
Period: August 2003 to May 2012
|
| 142 |
+
|
| 143 |
+
Consultant - Enterprise Risk Management at Deloitte & Touche
|
| 144 |
+
Period: July 2005 to November 2005
|
| 145 |
+
Location: Houston, TX
|
| 146 |
+
"""
|
| 147 |
+
education = format_education_human(profile.educations)
|
| 148 |
+
work_experience = _format_work_experience_human(profile.positions)
|
| 149 |
+
return education + "\n" + work_experience
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def format_profile_for_llm(profile: LinkedinProfile) -> str:
|
| 153 |
+
"""
|
| 154 |
+
Formats the given LinkedIn profile for input to the LLM. Uses XML-like tags for each section.
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
profile (Profile): The LinkedIn profile to be formatted.
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
str: The formatted profile as a string.
|
| 161 |
+
|
| 162 |
+
"""
|
| 163 |
+
education = "\n--\n".join(
|
| 164 |
+
[
|
| 165 |
+
f"<Education>\n<Period>{_format_date_component(edu.start)} to {_format_date_component(edu.end)}</Period>\n"
|
| 166 |
+
f"<School>{edu.school_name}</School>\n"
|
| 167 |
+
f"<Degree>{edu.degree or ''}</Degree>\n"
|
| 168 |
+
f"<FieldOfStudy>{edu.field_of_study or ''}</FieldOfStudy>\n"
|
| 169 |
+
f"</Education>"
|
| 170 |
+
for edu in profile.educations
|
| 171 |
+
]
|
| 172 |
+
)
|
| 173 |
+
work_experience = "\n--\n".join(
|
| 174 |
+
[
|
| 175 |
+
f"<WorkExperience>\n"
|
| 176 |
+
f"<Period>{_format_date_component(pos.start)} to {_format_date_component(pos.end)}</Period>\n"
|
| 177 |
+
f"<Title>{pos.title}</Title>\n"
|
| 178 |
+
f"<Company>{pos.company_name}</Company>\n"
|
| 179 |
+
f"</WorkExperience>"
|
| 180 |
+
for pos in profile.positions
|
| 181 |
+
]
|
| 182 |
+
)
|
| 183 |
+
return "\n----\n".join([education, work_experience]) + "\n----"
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def format_education(education: Education) -> str:
|
| 187 |
+
root = ET.Element("Education")
|
| 188 |
+
|
| 189 |
+
for attr, value in education.model_dump().items():
|
| 190 |
+
if value is not None:
|
| 191 |
+
if attr in ["start", "end"]:
|
| 192 |
+
date_element = ET.SubElement(root, attr)
|
| 193 |
+
for date_attr, date_value in value.items():
|
| 194 |
+
if date_value is not None:
|
| 195 |
+
date_sub_element = ET.SubElement(date_element, date_attr)
|
| 196 |
+
date_sub_element.text = str(date_value)
|
| 197 |
+
else:
|
| 198 |
+
element = ET.SubElement(root, attr)
|
| 199 |
+
element.text = str(value)
|
| 200 |
+
|
| 201 |
+
# Convert the ElementTree to a string
|
| 202 |
+
xml_string = ET.tostring(root, encoding="unicode")
|
| 203 |
+
|
| 204 |
+
# Use minidom to pretty print the XML
|
| 205 |
+
pretty_xml = minidom.parseString(xml_string).toprettyxml(indent=" ")
|
| 206 |
+
|
| 207 |
+
# Remove the XML declaration
|
| 208 |
+
pretty_xml = "\n".join(pretty_xml.split("\n")[1:])
|
| 209 |
+
|
| 210 |
+
return pretty_xml.strip()
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def format_position(position: Position) -> str:
|
| 214 |
+
root = ET.Element("Position")
|
| 215 |
+
|
| 216 |
+
for attr, value in position.model_dump().items():
|
| 217 |
+
if value is not None:
|
| 218 |
+
if attr in ["start", "end"]:
|
| 219 |
+
date_element = ET.SubElement(root, attr)
|
| 220 |
+
for date_attr, date_value in value.items():
|
| 221 |
+
if date_value is not None:
|
| 222 |
+
date_sub_element = ET.SubElement(date_element, date_attr)
|
| 223 |
+
date_sub_element.text = str(date_value)
|
| 224 |
+
else:
|
| 225 |
+
element = ET.SubElement(root, attr)
|
| 226 |
+
element.text = str(value)
|
| 227 |
+
|
| 228 |
+
# Convert the ElementTree to a string
|
| 229 |
+
xml_string = ET.tostring(root, encoding="unicode")
|
| 230 |
+
|
| 231 |
+
# Use minidom to pretty print the XML
|
| 232 |
+
pretty_xml = minidom.parseString(xml_string).toprettyxml(indent=" ")
|
| 233 |
+
|
| 234 |
+
# Remove the XML declaration
|
| 235 |
+
pretty_xml = "\n".join(pretty_xml.split("\n")[1:])
|
| 236 |
+
|
| 237 |
+
return pretty_xml.strip()
|
template/src/template/app/model/linkedin/linkedin_models.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, List
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
| 4 |
+
from pydantic.alias_generators import to_camel
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BaseSchema(BaseModel):
|
| 8 |
+
model_config = ConfigDict(
|
| 9 |
+
alias_generator=to_camel,
|
| 10 |
+
populate_by_name=True,
|
| 11 |
+
from_attributes=True,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class DateComponent(BaseSchema):
|
| 16 |
+
"""
|
| 17 |
+
Represents a date component with year, month, and day.
|
| 18 |
+
|
| 19 |
+
Attributes:
|
| 20 |
+
year (int | None): The year component of the date.
|
| 21 |
+
month (int | None): The month component of the date. Defaults to None.
|
| 22 |
+
day (int | None): The day component of the date. Defaults to None.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
year: int | None = None
|
| 26 |
+
month: int | None = None
|
| 27 |
+
day: int | None = None
|
| 28 |
+
|
| 29 |
+
@field_validator("year", "month", "day", mode="before")
|
| 30 |
+
@classmethod
|
| 31 |
+
def zero_to_none(cls, v: int | None) -> int | None:
|
| 32 |
+
return None if v == 0 else v
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# Function to format date components
|
| 36 |
+
class StartEndMixin(BaseSchema):
|
| 37 |
+
"""
|
| 38 |
+
A mixin class for including start and end dates in other Pydantic models.
|
| 39 |
+
This class is designed to be inherited by other classes that require start and end date attributes.
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
start: DateComponent | None = None
|
| 43 |
+
end: DateComponent | None = None
|
| 44 |
+
|
| 45 |
+
@field_validator("start", "end", mode="before")
|
| 46 |
+
@classmethod
|
| 47 |
+
def validate_date(cls, v: DateComponent) -> DateComponent | None:
|
| 48 |
+
if not v or v.year is None:
|
| 49 |
+
return None
|
| 50 |
+
return v
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class Geo(BaseSchema):
|
| 54 |
+
country: str | None = None
|
| 55 |
+
city: str | None = None
|
| 56 |
+
full: str | None = None
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class Language(BaseSchema):
|
| 60 |
+
name: str | None = None
|
| 61 |
+
proficiency: str | None = None
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class Education(StartEndMixin):
|
| 65 |
+
field_of_study: str | None = None
|
| 66 |
+
degree: str | None = None
|
| 67 |
+
grade: str | None = None
|
| 68 |
+
school_name: str | None = None
|
| 69 |
+
description: str | None = None
|
| 70 |
+
activities: str | None = None
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class Position(StartEndMixin):
|
| 74 |
+
company_name: str | None = None
|
| 75 |
+
company_username: str | None = None
|
| 76 |
+
company_url: str | None = None
|
| 77 |
+
company_industry: str | None = None
|
| 78 |
+
company_staff_count_range: str | None = None
|
| 79 |
+
title: str | None = None
|
| 80 |
+
location: str | None = None
|
| 81 |
+
description: str | None = None
|
| 82 |
+
employment_type: str | None = None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
class Skill(BaseSchema):
|
| 86 |
+
name: str | None = None
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
class Course(BaseSchema):
|
| 90 |
+
name: str | None = None
|
| 91 |
+
number: str | None = None
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
class CertificationCompany(BaseSchema):
|
| 95 |
+
name: str | None = None
|
| 96 |
+
universal_name: str | None = None
|
| 97 |
+
logo: str | None = None
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
class Certification(StartEndMixin):
|
| 101 |
+
name: str | None = None
|
| 102 |
+
authority: str | None = None
|
| 103 |
+
company: CertificationCompany | None = None
|
| 104 |
+
time_period: StartEndMixin | None = None
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
class LinkedinProfile(BaseSchema):
|
| 108 |
+
"""
|
| 109 |
+
Represents a comprehensive profile, encompassing various sections such as articles,
|
| 110 |
+
accomplishments, experiences, education, certifications, courses, test scores,
|
| 111 |
+
and more personal and professional details.
|
| 112 |
+
"""
|
| 113 |
+
|
| 114 |
+
first_name: str | None = None
|
| 115 |
+
last_name: str | None = None
|
| 116 |
+
is_open_to_work: bool | None = None
|
| 117 |
+
is_hiring: bool | None = None
|
| 118 |
+
profile_picture: str | None = None
|
| 119 |
+
summary: str | None = None
|
| 120 |
+
headline: str | None = None
|
| 121 |
+
geo: Geo | None = None
|
| 122 |
+
languages: List[Language] | None = []
|
| 123 |
+
educations: List[Education] = []
|
| 124 |
+
positions: List[Position] = Field(default=[], alias="position")
|
| 125 |
+
full_positions: List[Position] = Field(default=[], alias="position")
|
| 126 |
+
skills: List[Skill] | None = []
|
| 127 |
+
courses: List[Course] | None = []
|
| 128 |
+
certifications: List[Certification] | None = []
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def profile_from_json(json: dict[str, Any]) -> LinkedinProfile:
|
| 132 |
+
"""
|
| 133 |
+
Create a Profile instance from the given JSON data.
|
| 134 |
+
|
| 135 |
+
:param json: The JSON data to create a Profile instance from.
|
| 136 |
+
:return: A Profile instance created from the given JSON data.
|
| 137 |
+
"""
|
| 138 |
+
profile = LinkedinProfile.model_validate(json)
|
| 139 |
+
if (
|
| 140 |
+
profile.full_positions is not None and profile.positions is not None
|
| 141 |
+
): # Fixing a RapidAPI thing where the positions may be incomplete, and we want to use the full_positions
|
| 142 |
+
profile.positions = profile.full_positions
|
| 143 |
+
return profile
|
template/src/template/app/prompts/__init__.py
ADDED
|
File without changes
|
template/src/template/app/prompts/education_classifier/1 - education_classifier_human.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Please classify the following educational item based on the job candidate's full resume and the specific LinkedIn education information provided. Analyze both sources of information carefully to determine the most accurate classification.
|
| 2 |
+
|
| 3 |
+
Full Resume:
|
| 4 |
+
{resume}
|
| 5 |
+
|
| 6 |
+
Specific LinkedIn Education Item:
|
| 7 |
+
{education}
|
| 8 |
+
|
| 9 |
+
Provide your classification, confidence level (0.0 to 1.0), and reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the LinkedIn education item that support your decision.
|
template/src/template/app/prompts/education_classifier/1 - education_classifier_system.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are an expert in analyzing educational backgrounds and classifying them according to specific categories. Your task is to examine a founder's full resume and a specific educational item from LinkedIn, then classify the educational item into one of the predefined categories. Use the provided information carefully to make an accurate classification.
|
| 2 |
+
|
| 3 |
+
The classification categories are:
|
| 4 |
+
1. PRIMARY_SECONDARY: "Primary / Secondary School"
|
| 5 |
+
2. UNDERGRAD_INCOMPLETE: "Undergraduate (Incomplete)" (Includes things like study abroad)
|
| 6 |
+
3. UNDERGRAD_COMPLETED: "Undergraduate (Completed)"
|
| 7 |
+
4. MBA: "MBA"
|
| 8 |
+
5. LAW_SCHOOL: "Law School"
|
| 9 |
+
6. GRAD_SCHOOL: "Graduate School"
|
| 10 |
+
7. PHD: "PhD"
|
| 11 |
+
8. OTHER: "Other"
|
| 12 |
+
|
| 13 |
+
Pay close attention to the degree type, field of study, and any other relevant information provided in both the resume and the specific LinkedIn education item. Consider the context of the entire educational background when making your classification.
|
| 14 |
+
|
| 15 |
+
Provide your response in the following format:
|
| 16 |
+
|
| 17 |
+
output: [CATEGORY_NAME]
|
| 18 |
+
confidence: [0.0 to 1.0]
|
| 19 |
+
reasoning: [Your explanation here]
|
| 20 |
+
|
| 21 |
+
Ensure each part of your response is on a separate line, exactly as shown above.
|
template/src/template/app/prompts/prompt_loader.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
prompt_loader.py
|
| 3 |
+
|
| 4 |
+
This module provides a flexible and customizable system for loading and managing prompt templates
|
| 5 |
+
for use with language models. It allows for organization of prompts into families, loading of
|
| 6 |
+
specific prompt templates, and creation of customized Prompt objects.
|
| 7 |
+
|
| 8 |
+
Key components:
|
| 9 |
+
- PromptTemplate: A dataclass representing the content of a prompt template.
|
| 10 |
+
- PromptLoader: A class for loading prompt templates and creating Prompt objects.
|
| 11 |
+
|
| 12 |
+
Usage:
|
| 13 |
+
1. Organize prompt files in directories by family, with naming convention:
|
| 14 |
+
"<number> - <name>_<type>.txt"
|
| 15 |
+
Example: "1 - education_classifier_system.txt"
|
| 16 |
+
2. Use PromptLoader to load templates and create Prompt objects.
|
| 17 |
+
3. Use created Prompt objects individually or combine them into a PromptChain.
|
| 18 |
+
|
| 19 |
+
This system allows for easy management, versioning, and customization of prompts for
|
| 20 |
+
various language model tasks.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from typing import Any, Callable
|
| 25 |
+
|
| 26 |
+
from template.llm.llm_service import LLMService
|
| 27 |
+
from template.llm.prompt import Prompt
|
| 28 |
+
from template.llm.prompt_template import PromptTemplate
|
| 29 |
+
from template.llm.prompt_text import PromptText
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class PromptLoader:
|
| 33 |
+
"""
|
| 34 |
+
A class for loading prompt templates and creating Prompt objects.
|
| 35 |
+
|
| 36 |
+
This class provides methods to load prompt templates from files organized in a specific
|
| 37 |
+
directory structure, and to create customized Prompt objects from these templates.
|
| 38 |
+
|
| 39 |
+
Attributes:
|
| 40 |
+
base_path (Path): The base directory path where prompt family directories are located.
|
| 41 |
+
|
| 42 |
+
Example::
|
| 43 |
+
|
| 44 |
+
# This example demonstrates how to use the PromptLoader class to load templates
|
| 45 |
+
# and create customized Prompt objects for different tasks.
|
| 46 |
+
|
| 47 |
+
# Set up the PromptLoader
|
| 48 |
+
loader = PromptLoader(Path("path/to/prompts"))
|
| 49 |
+
|
| 50 |
+
# Load and create the education classifier prompt
|
| 51 |
+
edu_template = loader.load_template("education_classifier/1 - education_classifier")
|
| 52 |
+
edu_prompt = loader.create_prompt(
|
| 53 |
+
edu_template,
|
| 54 |
+
llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_MINI),
|
| 55 |
+
max_tokens=500,
|
| 56 |
+
temperature=0.7,
|
| 57 |
+
output_formatter=lambda x: {"classification": x},
|
| 58 |
+
resume="Full resume text...",
|
| 59 |
+
education="Specific LinkedIn education item..."
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Load and create the narrative generator prompt
|
| 63 |
+
narrative_template = loader.load_template("narrative_generator/2 - narrative_generator")
|
| 64 |
+
narrative_prompt = loader.create_prompt(
|
| 65 |
+
narrative_template,
|
| 66 |
+
llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_O), # Using a different model
|
| 67 |
+
max_tokens=1000,
|
| 68 |
+
temperature=0.9,
|
| 69 |
+
output_formatter=lambda x: {"narrative": x},
|
| 70 |
+
education_classification=edu_prompt.evaluate()["classification"], # Using output from previous prompt
|
| 71 |
+
additional_context="Any other relevant information..."
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
# These prompts can then be used individually or combined into a PromptChain
|
| 75 |
+
# for more complex workflows.
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
def __init__(self, base_path: Path = Path(__file__).parent):
|
| 79 |
+
"""
|
| 80 |
+
Initialize the PromptLoader with a base path.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
base_path (Path): The base directory path where prompt family directories are located.
|
| 84 |
+
"""
|
| 85 |
+
self.base_path = base_path
|
| 86 |
+
|
| 87 |
+
def load_template(self, full_name: str) -> PromptTemplate:
|
| 88 |
+
"""
|
| 89 |
+
Load a prompt template from files based on the provided full name.
|
| 90 |
+
|
| 91 |
+
This method searches for and loads prompt files matching the given name within
|
| 92 |
+
the specified family directory. It expects files to follow the naming convention:
|
| 93 |
+
"<number> - <name>_<type>.txt"
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
full_name (str): The full name of the prompt template in the format "family/name".
|
| 97 |
+
Example: "education_classifier/1 - education_classifier"
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
PromptTemplate: A PromptTemplate object containing the loaded prompt contents.
|
| 101 |
+
|
| 102 |
+
Raises:
|
| 103 |
+
ValueError: If the full_name format is invalid or no valid prompt files are found.
|
| 104 |
+
FileNotFoundError: If the specified prompt family directory doesn't exist.
|
| 105 |
+
"""
|
| 106 |
+
parts = full_name.split("/")
|
| 107 |
+
if len(parts) != 2:
|
| 108 |
+
raise ValueError(f"Invalid prompt name format. Expected 'family/name', got '{full_name}'")
|
| 109 |
+
|
| 110 |
+
family, name = parts
|
| 111 |
+
prompt_dir = self.base_path / family
|
| 112 |
+
|
| 113 |
+
if not prompt_dir.is_dir():
|
| 114 |
+
raise FileNotFoundError(f"Prompt family directory not found: {prompt_dir}")
|
| 115 |
+
|
| 116 |
+
template = PromptTemplate(name)
|
| 117 |
+
prefix = name.split(" - ")[-1] # Get the part after "1 - " or similar
|
| 118 |
+
|
| 119 |
+
for file in prompt_dir.glob(f"*{prefix}*.txt"):
|
| 120 |
+
content = file.read_text().strip()
|
| 121 |
+
if "system" in file.name:
|
| 122 |
+
template.system_prompt = content
|
| 123 |
+
elif "human" in file.name or "user" in file.name:
|
| 124 |
+
template.user_prompt = content
|
| 125 |
+
elif "assistant" in file.name:
|
| 126 |
+
template.partial_assistant_prompt = content
|
| 127 |
+
|
| 128 |
+
if not template.system_prompt and not template.user_prompt:
|
| 129 |
+
raise ValueError(f"No valid prompt files found for {full_name}")
|
| 130 |
+
|
| 131 |
+
return template
|
| 132 |
+
|
| 133 |
+
def create_prompt(
|
| 134 |
+
self,
|
| 135 |
+
template: PromptTemplate,
|
| 136 |
+
llm_service: LLMService,
|
| 137 |
+
max_tokens: int = 1000,
|
| 138 |
+
temperature: float = 0.0,
|
| 139 |
+
output_formatter: Callable[[str], Any] = lambda x: x,
|
| 140 |
+
**kwargs: Any,
|
| 141 |
+
) -> Prompt:
|
| 142 |
+
"""
|
| 143 |
+
Create a Prompt object from a PromptTemplate with custom parameters.
|
| 144 |
+
|
| 145 |
+
This method allows for the creation of a fully configured Prompt object,
|
| 146 |
+
ready for use with a language model service. It applies the provided
|
| 147 |
+
parameters and input variables to the template.
|
| 148 |
+
|
| 149 |
+
Args:
|
| 150 |
+
template (PromptTemplate): The prompt template to use as a base.
|
| 151 |
+
llm_service (LLMService): The language model service to use for this prompt.
|
| 152 |
+
max_tokens (int): Maximum number of tokens for the model response. Defaults to 1000.
|
| 153 |
+
temperature (float): Sampling temperature for the model. Defaults to 0.0.
|
| 154 |
+
output_formatter (Callable[[str], Any] | None): A function to format the model's output.
|
| 155 |
+
If None, the output is returned as-is.
|
| 156 |
+
**kwargs: Additional keyword arguments to be used as input variables in the prompt texts.
|
| 157 |
+
|
| 158 |
+
Returns:
|
| 159 |
+
Prompt: A fully configured Prompt object ready for evaluation.
|
| 160 |
+
"""
|
| 161 |
+
system_prompt = PromptText(template.system_prompt, inputs=kwargs) if template.system_prompt else None
|
| 162 |
+
user_prompt = PromptText(template.user_prompt, inputs=kwargs) if template.user_prompt else None
|
| 163 |
+
partial_assistant_prompt = (
|
| 164 |
+
PromptText(template.partial_assistant_prompt, inputs=kwargs) if template.partial_assistant_prompt else None
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
return Prompt(
|
| 168 |
+
llm_service=llm_service,
|
| 169 |
+
system_prompt=system_prompt,
|
| 170 |
+
user_prompt=user_prompt,
|
| 171 |
+
partial_assistant_prompt=partial_assistant_prompt,
|
| 172 |
+
max_tokens=max_tokens,
|
| 173 |
+
temperature=temperature,
|
| 174 |
+
output_formatter=output_formatter,
|
| 175 |
+
)
|
template/src/template/app/prompts/work_experience_classifier/1 - work_experience_classifier_human.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Please classify the following work experience item based on the job candidate's full resume and the specific work experience information provided from their LinkedIn profile. Analyze both sources of information carefully to determine the most accurate classification for both Primary Job Type and Secondary Job Type.
|
| 2 |
+
|
| 3 |
+
Full Resume:
|
| 4 |
+
{resume}
|
| 5 |
+
|
| 6 |
+
Specific Work Experience Item:
|
| 7 |
+
{work_experience}
|
| 8 |
+
|
| 9 |
+
Provide your classification for both Primary Job Type and Secondary Job Type, along with your confidence level (0.0 to 1.0) and reasoning in the specified format. Ensure your reasoning refers to specific details from both the resume and the work experience item that support your decision.
|
template/src/template/app/prompts/work_experience_classifier/1 - work_experience_classifier_system.txt
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are an expert in analyzing professional work experiences and classifying them according to specific categories. Your task is to examine a job candidate's full resume and a specific work experience item from their LinkedIn profile, then classify the work experience into two categories: Primary Job Type and Secondary Job Type.
|
| 2 |
+
|
| 3 |
+
Use the provided information carefully to make accurate classifications. Pay close attention to job titles, responsibilities, durations, and any other relevant information provided in both the resume and the specific work experience item.
|
| 4 |
+
|
| 5 |
+
Primary Job Type categories:
|
| 6 |
+
1. FULL_TIME: Regular, ongoing employment
|
| 7 |
+
2. ADVISORY_BOARD_INVESTOR: Advisory roles, board memberships, or independent investing activities
|
| 8 |
+
3. INTERNSHIP: Short-term positions for students or recent graduates, including summer/seasonal analyst roles
|
| 9 |
+
4. EXTRACURRICULAR: Non-professional activities related to career development
|
| 10 |
+
5. EDUCATION: When educational experiences are listed as work experiences
|
| 11 |
+
6. OTHER: Volunteer work or any other type that doesn't fit the above categories
|
| 12 |
+
|
| 13 |
+
Secondary Job Type categories:
|
| 14 |
+
1. INVESTING: Professional investing roles, such as venture capital, private equity, or hedge fund positions
|
| 15 |
+
2. BACK_OFFICE: Supporting roles in financial firms, such as operations, accounting, or IT
|
| 16 |
+
3. INVESTMENT_BANKING: Roles in investment banks or related financial services
|
| 17 |
+
4. CONSULTING: Management or strategy consulting roles
|
| 18 |
+
5. ENGINEERING: Software development, hardware engineering, or other technical roles
|
| 19 |
+
6. ENTREPRENEUR_FOUNDER: Founding or co-founding a company
|
| 20 |
+
7. CORPDEV_STRATEGY: Corporate development or strategic planning roles
|
| 21 |
+
8. OTHER: Any role that doesn't fit the above categories
|
| 22 |
+
|
| 23 |
+
Provide your response in the following format exactly:
|
| 24 |
+
|
| 25 |
+
primary_job_type: [PRIMARY_JOB_TYPE]
|
| 26 |
+
secondary_job_type: [SECONDARY_JOB_TYPE]
|
| 27 |
+
confidence: [0.0 to 1.0]
|
| 28 |
+
reasoning: [Your explanation here]
|
| 29 |
+
|
| 30 |
+
Ensure each part of your response is on a separate line, exactly as shown above. There should be only four lines.
|
| 31 |
+
|
| 32 |
+
The PRIMARY_JOB_TYPE must be one of: FULL_TIME, ADVISORY_BOARD_INVESTOR, INTERNSHIP, EXTRACURRICULAR, EDUCATION, OTHER
|
| 33 |
+
|
| 34 |
+
The SECONDARY_JOB_TYPE must be one of: INVESTING, BACK_OFFICE, INVESTMENT_BANKING, CONSULTING, ENGINEERING, ENTREPRENEUR_FOUNDER, CORPDEV_STRATEGY, OTHER
|
| 35 |
+
|
| 36 |
+
Your confidence level should reflect how certain you are about your classification based on the information provided.
|
| 37 |
+
|
| 38 |
+
In your reasoning, briefly explain why you chose these classifications, referencing specific details from the resume and work experience item that support your decision.
|
template/src/template/app/work_experience_classifier.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
work_experience_classifier.py
|
| 3 |
+
|
| 4 |
+
This module provides functionality for classifying work experiences from LinkedIn profiles.
|
| 5 |
+
It uses a language model to determine the primary and secondary job types based on the
|
| 6 |
+
information provided in a LinkedIn profile and specific work experience.
|
| 7 |
+
|
| 8 |
+
Classes:
|
| 9 |
+
PrimaryJobType: Enum representing different primary job types.
|
| 10 |
+
SecondaryJobType: Enum representing different secondary job types.
|
| 11 |
+
WorkExperienceClassification: Pydantic model for work experience classification results.
|
| 12 |
+
WorkExperienceClassifier: Main class for classifying work experiences.
|
| 13 |
+
|
| 14 |
+
Usage:
|
| 15 |
+
classifier = WorkExperienceClassifier()
|
| 16 |
+
classification = await classifier.classify_work_experience(linkedin_profile, work_experience)
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from enum import Enum
|
| 20 |
+
from typing import Any, Final
|
| 21 |
+
|
| 22 |
+
from pydantic import BaseModel, Field
|
| 23 |
+
|
| 24 |
+
from template.app import bindings
|
| 25 |
+
from template.app.model.linkedin.linkedin_formatters import format_position, format_profile_as_resume
|
| 26 |
+
from template.app.model.linkedin.linkedin_models import LinkedinProfile, Position
|
| 27 |
+
from template.app.prompts.prompt_loader import PromptLoader
|
| 28 |
+
from template.llm.llm_service import LLMService
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class PrimaryJobType(str, Enum):
|
| 32 |
+
"""Enumeration of different primary job types for work experience classification."""
|
| 33 |
+
|
| 34 |
+
FULL_TIME = "Full-time"
|
| 35 |
+
ADVISORY_BOARD_INVESTOR = "Advisory / Board / Independent Investor"
|
| 36 |
+
INTERNSHIP = "Internship"
|
| 37 |
+
EXTRACURRICULAR = "Extracurricular"
|
| 38 |
+
EDUCATION = "Education"
|
| 39 |
+
OTHER = "Other"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class SecondaryJobType(str, Enum):
|
| 43 |
+
"""Enumeration of different secondary job types for work experience classification."""
|
| 44 |
+
|
| 45 |
+
INVESTING = "Investing"
|
| 46 |
+
BACK_OFFICE = "Back Office"
|
| 47 |
+
INVESTMENT_BANKING = "Investment Banking"
|
| 48 |
+
CONSULTING = "Consulting"
|
| 49 |
+
ENGINEERING = "Engineering"
|
| 50 |
+
ENTREPRENEUR_FOUNDER = "Entrepreneur / Founder"
|
| 51 |
+
CORPDEV_STRATEGY = "CorpDev / Strategy"
|
| 52 |
+
OTHER = "Other"
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
_JOB_TYPE_MAPPINGS: Final[dict[str, dict[str, Enum]]] = {
|
| 56 |
+
"primary": {job_type.name: job_type for job_type in PrimaryJobType},
|
| 57 |
+
"secondary": {job_type.name: job_type for job_type in SecondaryJobType},
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class WorkExperienceClassification(BaseModel):
|
| 62 |
+
"""
|
| 63 |
+
Pydantic model representing the classification result for a work experience item.
|
| 64 |
+
|
| 65 |
+
Attributes:
|
| 66 |
+
primary_job_type (PrimaryJobType): The classified primary job type.
|
| 67 |
+
secondary_job_type (SecondaryJobType): The classified secondary job type.
|
| 68 |
+
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 69 |
+
reasoning (str): Explanation for the classification decision.
|
| 70 |
+
"""
|
| 71 |
+
|
| 72 |
+
primary_job_type: PrimaryJobType = Field(description="The classified primary job type")
|
| 73 |
+
secondary_job_type: SecondaryJobType = Field(description="The classified secondary job type")
|
| 74 |
+
confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
|
| 75 |
+
reasoning: str = Field(description="Explanation for the classification")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
class WorkExperienceClassifier:
|
| 79 |
+
"""
|
| 80 |
+
A class for classifying work experiences from LinkedIn profiles.
|
| 81 |
+
|
| 82 |
+
This classifier uses a language model to determine the primary and secondary job types
|
| 83 |
+
based on the information provided in a LinkedIn profile and specific work experience.
|
| 84 |
+
|
| 85 |
+
Attributes:
|
| 86 |
+
_llm_service (LLMService): The language model service used for classification.
|
| 87 |
+
_prompt_template (Any): The template for generating prompts for the language model.
|
| 88 |
+
_prompt_loader (PromptLoader): The loader for prompt templates.
|
| 89 |
+
"""
|
| 90 |
+
|
| 91 |
+
def __init__(
|
| 92 |
+
self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
|
| 93 |
+
) -> None:
|
| 94 |
+
"""
|
| 95 |
+
Initialize the WorkExperienceClassifier.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
llm_service (LLMService, optional): The language model service to use.
|
| 99 |
+
Defaults to the OpenAI service defined in bindings.
|
| 100 |
+
prompt_loader (PromptLoader, optional): The prompt loader to use.
|
| 101 |
+
Defaults to the prompt loader defined in bindings.
|
| 102 |
+
"""
|
| 103 |
+
self._llm_service = llm_service
|
| 104 |
+
self._prompt_template = prompt_loader.load_template("work_experience_classifier/1 - work_experience_classifier")
|
| 105 |
+
self._prompt_loader = prompt_loader
|
| 106 |
+
|
| 107 |
+
@staticmethod
|
| 108 |
+
def _parse_output(output: str) -> WorkExperienceClassification:
|
| 109 |
+
"""
|
| 110 |
+
Parse the output from the language model into a WorkExperienceClassification object.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
output (str): The raw output string from the language model.
|
| 114 |
+
|
| 115 |
+
Returns:
|
| 116 |
+
WorkExperienceClassification: A structured representation of the classification result.
|
| 117 |
+
|
| 118 |
+
Raises:
|
| 119 |
+
ValueError: If the output contains an unknown job type or invalid confidence value.
|
| 120 |
+
"""
|
| 121 |
+
lines = output.strip().split("\n")
|
| 122 |
+
parsed: dict[str, Any] = {}
|
| 123 |
+
for line in lines:
|
| 124 |
+
key, value = line.split(":", 1)
|
| 125 |
+
parsed[key.strip()] = value.strip()
|
| 126 |
+
|
| 127 |
+
primary_job_type_str = parsed["primary_job_type"].upper()
|
| 128 |
+
secondary_job_type_str = parsed["secondary_job_type"].upper()
|
| 129 |
+
|
| 130 |
+
try:
|
| 131 |
+
primary_job_type = _JOB_TYPE_MAPPINGS["primary"][primary_job_type_str]
|
| 132 |
+
secondary_job_type = _JOB_TYPE_MAPPINGS["secondary"][secondary_job_type_str]
|
| 133 |
+
except KeyError as e:
|
| 134 |
+
raise ValueError(f"Unknown job type: {str(e)}")
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
confidence = float(parsed["confidence"])
|
| 138 |
+
except ValueError:
|
| 139 |
+
raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
|
| 140 |
+
|
| 141 |
+
return WorkExperienceClassification(
|
| 142 |
+
primary_job_type=PrimaryJobType(primary_job_type),
|
| 143 |
+
secondary_job_type=SecondaryJobType(secondary_job_type),
|
| 144 |
+
confidence=confidence,
|
| 145 |
+
reasoning=parsed["reasoning"],
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
async def classify_work_experience(
|
| 149 |
+
self, linkedin_profile: LinkedinProfile, work_experience: Position
|
| 150 |
+
) -> WorkExperienceClassification:
|
| 151 |
+
"""
|
| 152 |
+
Classify a single work experience item from a LinkedIn profile.
|
| 153 |
+
|
| 154 |
+
This method prepares the input for the language model, sends the query,
|
| 155 |
+
and processes the result to classify the work experience item.
|
| 156 |
+
|
| 157 |
+
Args:
|
| 158 |
+
linkedin_profile (LinkedinProfile): The full LinkedIn profile of the individual.
|
| 159 |
+
work_experience (Position): The specific work experience item to classify.
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
WorkExperienceClassification: The classification result for the work experience item.
|
| 163 |
+
|
| 164 |
+
Raises:
|
| 165 |
+
ValueError: If the prompt evaluation fails to produce a result.
|
| 166 |
+
"""
|
| 167 |
+
prompt = self._prompt_loader.create_prompt(
|
| 168 |
+
self._prompt_template,
|
| 169 |
+
llm_service=self._llm_service,
|
| 170 |
+
output_formatter=self._parse_output,
|
| 171 |
+
resume=format_profile_as_resume(linkedin_profile),
|
| 172 |
+
work_experience=format_position(work_experience),
|
| 173 |
+
)
|
| 174 |
+
return await prompt.evaluate() # type: ignore
|
template/src/template/llm/__init__.py
ADDED
|
File without changes
|
template/src/template/llm/bedrock/__init__.py
ADDED
|
File without changes
|
template/src/template/llm/bedrock/bedrock.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import json
|
| 3 |
+
from contextlib import asynccontextmanager
|
| 4 |
+
from typing import AsyncIterator
|
| 5 |
+
|
| 6 |
+
import aiohttp
|
| 7 |
+
from botocore.auth import SigV4Auth
|
| 8 |
+
from botocore.awsrequest import AWSRequest
|
| 9 |
+
|
| 10 |
+
from template.llm.bedrock.bedrock_model import AnthropicModel, BedrockModel, get_bedrock_model_rate_limit
|
| 11 |
+
from template.llm.bedrock.bedrock_rate_limiter import BedrockRateLimiter
|
| 12 |
+
from template.llm.llm_service import LLMService
|
| 13 |
+
from template.shared import aws_clients, config, logger_factory
|
| 14 |
+
|
| 15 |
+
logger = logger_factory.get_logger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class AsyncBedrockService(LLMService):
|
| 19 |
+
"""
|
| 20 |
+
An asynchronous service class for making calls to the AWS Bedrock API.
|
| 21 |
+
|
| 22 |
+
This class handles authentication, rate limiting, and API interactions
|
| 23 |
+
when using AWS Bedrock's language models.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self, model: BedrockModel, max_concurrency: int = 10):
|
| 27 |
+
"""
|
| 28 |
+
Initialize the AsyncBedrockService.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
model (BedrockModel): The Bedrock model to use for API calls.
|
| 32 |
+
max_concurrency (int): Maximum number of concurrent API calls. Defaults to 10.
|
| 33 |
+
"""
|
| 34 |
+
bedrock_client = aws_clients.get_bedrock_client()
|
| 35 |
+
self._session: aiohttp.ClientSession | None = None
|
| 36 |
+
self._credentials = bedrock_client._get_credentials()
|
| 37 |
+
self._region = config.get_aws_region()
|
| 38 |
+
self._semaphore = asyncio.Semaphore(max_concurrency)
|
| 39 |
+
self._model = model
|
| 40 |
+
self._rate_limiter = BedrockRateLimiter(
|
| 41 |
+
rate=max(get_bedrock_model_rate_limit(model).requests_per_minute - 10, 1), per=60.0
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
@asynccontextmanager
|
| 45 |
+
async def __call__(self) -> AsyncIterator["AsyncBedrockService"]:
|
| 46 |
+
self._session = aiohttp.ClientSession()
|
| 47 |
+
try:
|
| 48 |
+
yield self
|
| 49 |
+
finally:
|
| 50 |
+
if self._session:
|
| 51 |
+
await self._session.close()
|
| 52 |
+
|
| 53 |
+
async def invoke(
|
| 54 |
+
self,
|
| 55 |
+
user_prompt: str | None = None,
|
| 56 |
+
system_prompt: str | None = None,
|
| 57 |
+
partial_assistant_prompt: str | None = None,
|
| 58 |
+
max_tokens: int = 1000,
|
| 59 |
+
temperature: float = 0.0,
|
| 60 |
+
) -> str | None:
|
| 61 |
+
"""
|
| 62 |
+
Invoke the Bedrock API with the given prompts and parameters.
|
| 63 |
+
|
| 64 |
+
This method handles rate limiting and makes the API call.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
user_prompt (str | None): The main prompt from the user.
|
| 68 |
+
system_prompt (str | None): A system message to set the context.
|
| 69 |
+
partial_assistant_prompt (str | None): A partial response from the assistant.
|
| 70 |
+
max_tokens (int): Maximum number of tokens in the response.
|
| 71 |
+
temperature (float): Sampling temperature for response generation.
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
str | None: The generated response from the Bedrock API, or None if no response.
|
| 75 |
+
|
| 76 |
+
Raises:
|
| 77 |
+
ValueError: If the model is not an Anthropic model or if no prompts are provided.
|
| 78 |
+
Exception: For any errors encountered during the API call.
|
| 79 |
+
"""
|
| 80 |
+
# Verify that the model is an Anthropic model
|
| 81 |
+
if not isinstance(self._model, AnthropicModel):
|
| 82 |
+
raise ValueError(f"Model {self._model} is not an Anthropic model")
|
| 83 |
+
|
| 84 |
+
# Verify that user prompt, system prompt, and partial assistant prompt are not all None
|
| 85 |
+
if not any([user_prompt, system_prompt, partial_assistant_prompt]):
|
| 86 |
+
raise ValueError("At least one of user_prompt, system_prompt, or partial_assistant_prompt must be provided")
|
| 87 |
+
|
| 88 |
+
async with self._semaphore: # Use semaphore to limit concurrency
|
| 89 |
+
await self._rate_limiter.acquire() # rate limit first
|
| 90 |
+
url = f"https://bedrock-runtime.{self._region}.amazonaws.com/model/{self._model.value}/invoke"
|
| 91 |
+
if user_prompt:
|
| 92 |
+
messages = [{"role": "user", "content": user_prompt}]
|
| 93 |
+
if partial_assistant_prompt:
|
| 94 |
+
messages.append({"role": "assistant", "content": partial_assistant_prompt})
|
| 95 |
+
|
| 96 |
+
body = {
|
| 97 |
+
"anthropic_version": "bedrock-2023-05-31",
|
| 98 |
+
"max_tokens": max_tokens,
|
| 99 |
+
"messages": messages,
|
| 100 |
+
"temperature": temperature,
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
if system_prompt:
|
| 104 |
+
body["system"] = system_prompt
|
| 105 |
+
|
| 106 |
+
body_json = json.dumps(body)
|
| 107 |
+
|
| 108 |
+
headers = {
|
| 109 |
+
"Content-Type": "application/json",
|
| 110 |
+
"Accept": "application/json",
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
request = AWSRequest(method="POST", url=url, data=body_json, headers=headers)
|
| 114 |
+
SigV4Auth(self._credentials, "bedrock", self._region).add_auth(request)
|
| 115 |
+
|
| 116 |
+
if self._session is None:
|
| 117 |
+
raise RuntimeError("Session is not initialized")
|
| 118 |
+
|
| 119 |
+
async with self._session.post(url, data=body_json, headers=dict(request.headers)) as response:
|
| 120 |
+
logger.info("Bedrock API called", url=response.url)
|
| 121 |
+
if response.status != 200:
|
| 122 |
+
raise Exception(f"Bedrock API error: {response.status} {await response.text()}")
|
| 123 |
+
|
| 124 |
+
response_body = await response.json()
|
| 125 |
+
|
| 126 |
+
if "content" not in response_body:
|
| 127 |
+
raise Exception(f"Content not found in Bedrock response: {response_body}")
|
| 128 |
+
|
| 129 |
+
text = str(response_body["content"][0]["text"])
|
| 130 |
+
if partial_assistant_prompt:
|
| 131 |
+
text = f"{partial_assistant_prompt}{text}"
|
| 132 |
+
|
| 133 |
+
# Extract token usage information
|
| 134 |
+
input_tokens = response_body.get("usage", {}).get("input_tokens", 0)
|
| 135 |
+
output_tokens = response_body.get("usage", {}).get("output_tokens", 0)
|
| 136 |
+
|
| 137 |
+
# Log token usage
|
| 138 |
+
logger.info("Token usage", input_tokens=input_tokens, output_tokens=output_tokens)
|
| 139 |
+
|
| 140 |
+
return text
|
template/src/template/llm/bedrock/bedrock_model.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from enum import Enum
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@dataclass
|
| 6 |
+
class RateLimit:
|
| 7 |
+
"""
|
| 8 |
+
Dataclass representing rate limit information for Bedrock models.
|
| 9 |
+
|
| 10 |
+
Attributes:
|
| 11 |
+
requests_per_minute (int): The maximum number of requests allowed per minute.
|
| 12 |
+
tokens_per_minute (int | None): The maximum number of tokens allowed per minute, if applicable.
|
| 13 |
+
regions (str): The AWS regions where this rate limit applies.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
requests_per_minute: int
|
| 17 |
+
tokens_per_minute: int | None
|
| 18 |
+
regions: str
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class BedrockModel(Enum):
|
| 22 |
+
"""
|
| 23 |
+
Base enum class for Bedrock models.
|
| 24 |
+
|
| 25 |
+
This class defines the interface for Bedrock model enums and provides
|
| 26 |
+
a method to get rate limits for specific models.
|
| 27 |
+
|
| 28 |
+
Methods:
|
| 29 |
+
get_rate_limit(model: BedrockModel) -> RateLimit:
|
| 30 |
+
Get the rate limit for a specific model.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
@classmethod
|
| 34 |
+
def get_rate_limit(cls, model: "BedrockModel") -> RateLimit:
|
| 35 |
+
"""
|
| 36 |
+
Get the rate limit for a specific Bedrock model.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
model (BedrockModel): The Bedrock model to get the rate limit for.
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
RateLimit: The rate limit information for the specified model.
|
| 43 |
+
"""
|
| 44 |
+
return cls._rate_limits()[model]
|
| 45 |
+
|
| 46 |
+
@classmethod
|
| 47 |
+
def _rate_limits(cls) -> dict["BedrockModel", RateLimit]:
|
| 48 |
+
"""
|
| 49 |
+
Define the rate limits for each Bedrock model.
|
| 50 |
+
|
| 51 |
+
This method must be implemented by subclasses.
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
dict[BedrockModel, RateLimit]: A dictionary mapping each model to its rate limit.
|
| 55 |
+
|
| 56 |
+
Raises:
|
| 57 |
+
NotImplementedError: If not implemented by a subclass.
|
| 58 |
+
"""
|
| 59 |
+
raise NotImplementedError("Subclasses must implement this method")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class AnthropicModel(BedrockModel):
|
| 63 |
+
"""
|
| 64 |
+
Enum representing different Anthropic models available through Bedrock.
|
| 65 |
+
|
| 66 |
+
This enum includes various versions of Claude models.
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
CLAUDE_3_OPUS = "anthropic.claude-3-opus-20240229-v1:0"
|
| 70 |
+
CLAUDE_3_5_SONNET = "anthropic.claude-3-5-sonnet-20240620-v1:0"
|
| 71 |
+
CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0"
|
| 72 |
+
CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0"
|
| 73 |
+
CLAUDE_INSTANT_1_2 = "anthropic.claude-instant-v1"
|
| 74 |
+
|
| 75 |
+
@classmethod
|
| 76 |
+
def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
|
| 77 |
+
return {
|
| 78 |
+
cls.CLAUDE_3_OPUS: RateLimit(50, 400_000, "All"),
|
| 79 |
+
cls.CLAUDE_3_5_SONNET: RateLimit(50, 400_000, "All"),
|
| 80 |
+
cls.CLAUDE_3_SONNET: RateLimit(
|
| 81 |
+
500, 1_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
|
| 82 |
+
),
|
| 83 |
+
cls.CLAUDE_3_HAIKU: RateLimit(
|
| 84 |
+
1000, 2_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
|
| 85 |
+
),
|
| 86 |
+
cls.CLAUDE_INSTANT_1_2: RateLimit(
|
| 87 |
+
1000, 1_000_000, "US East (N. Virginia) (us-east-1), US West (Oregon) (us-west-2)"
|
| 88 |
+
),
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
class MetaModel(BedrockModel):
|
| 93 |
+
"""
|
| 94 |
+
Enum representing different Meta models available through Bedrock.
|
| 95 |
+
|
| 96 |
+
This enum includes various versions of Llama models.
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
LLAMA_2_70B_CHAT = "meta.llama2-70b-chat-v1"
|
| 100 |
+
LLAMA_2_13B_CHAT = "meta.llama2-13b-chat-v1"
|
| 101 |
+
LLAMA_3_8B_INSTRUCT = "meta.llama3-8b-instruct-v1:0"
|
| 102 |
+
LLAMA_3_70B_INSTRUCT = "meta.llama3-70b-instruct-v1:0"
|
| 103 |
+
|
| 104 |
+
@classmethod
|
| 105 |
+
def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
|
| 106 |
+
return {
|
| 107 |
+
cls.LLAMA_2_70B_CHAT: RateLimit(400, 300_000, "All"),
|
| 108 |
+
cls.LLAMA_2_13B_CHAT: RateLimit(800, 300_000, "All"),
|
| 109 |
+
cls.LLAMA_3_8B_INSTRUCT: RateLimit(800, 300_000, "All"),
|
| 110 |
+
cls.LLAMA_3_70B_INSTRUCT: RateLimit(400, 300_000, "All"),
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
class MistralModel(BedrockModel):
|
| 115 |
+
"""
|
| 116 |
+
Enum representing different Mistral models available through Bedrock.
|
| 117 |
+
|
| 118 |
+
This enum includes various versions of Mistral models.
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
MISTRAL_7B_INSTRUCT = "mistral.mistral-7b-instruct-v0:2"
|
| 122 |
+
MIXTRAL_8X7B_INSTRUCT = "mistral.mixtral-8x7b-instruct-v0:1"
|
| 123 |
+
MISTRAL_LARGE = "mistral.mistral-large-2402-v1:0"
|
| 124 |
+
MISTRAL_SMALL = "mistral.mistral-small-2402-v1:0"
|
| 125 |
+
|
| 126 |
+
@classmethod
|
| 127 |
+
def _rate_limits(cls) -> dict[BedrockModel, RateLimit]:
|
| 128 |
+
return {
|
| 129 |
+
cls.MISTRAL_7B_INSTRUCT: RateLimit(800, 300_000, "All"),
|
| 130 |
+
cls.MIXTRAL_8X7B_INSTRUCT: RateLimit(400, 300_000, "All"),
|
| 131 |
+
cls.MISTRAL_LARGE: RateLimit(400, 300_000, "All"),
|
| 132 |
+
cls.MISTRAL_SMALL: RateLimit(400, 300_000, "All"),
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# Function to get rate limit for any Bedrock model
|
| 137 |
+
def get_bedrock_model_rate_limit(model: BedrockModel) -> RateLimit:
|
| 138 |
+
"""
|
| 139 |
+
Get the rate limit for a specific Bedrock model.
|
| 140 |
+
|
| 141 |
+
This is a convenience function that calls the get_rate_limit method of the appropriate BedrockModel subclass.
|
| 142 |
+
|
| 143 |
+
Args:
|
| 144 |
+
model (BedrockModel): The Bedrock model to get the rate limit for.
|
| 145 |
+
|
| 146 |
+
Returns:
|
| 147 |
+
RateLimit: The rate limit information for the specified model.
|
| 148 |
+
"""
|
| 149 |
+
return type(model).get_rate_limit(model)
|
template/src/template/llm/bedrock/bedrock_rate_limiter.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import math
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
from template.shared import logger_factory
|
| 6 |
+
|
| 7 |
+
logger = logger_factory.get_logger(__name__)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class BedrockRateLimiter:
|
| 11 |
+
"""
|
| 12 |
+
A rate limiter for AWS Bedrock API calls.
|
| 13 |
+
|
| 14 |
+
This class implements a token bucket algorithm to manage API request rates.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, rate: int, per: float = 60.0):
|
| 18 |
+
"""
|
| 19 |
+
Initialize the BedrockRateLimiter.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
rate (int): The number of requests allowed per time period.
|
| 23 |
+
per (float): The time period in seconds. Defaults to 60.0.
|
| 24 |
+
"""
|
| 25 |
+
self.rate = rate
|
| 26 |
+
self.per = per
|
| 27 |
+
self.allowance = rate
|
| 28 |
+
self.last_check = time.time()
|
| 29 |
+
|
| 30 |
+
async def acquire(self) -> None:
|
| 31 |
+
"""
|
| 32 |
+
Acquire permission to make an API call, respecting rate limits.
|
| 33 |
+
|
| 34 |
+
This method implements a token bucket algorithm. If the rate limit is exceeded,
|
| 35 |
+
it will pause execution for an appropriate amount of time.
|
| 36 |
+
|
| 37 |
+
Raises:
|
| 38 |
+
No specific exceptions are raised, but the method may cause the execution
|
| 39 |
+
to sleep for up to 30 seconds if the rate limit is exceeded.
|
| 40 |
+
"""
|
| 41 |
+
now = time.time()
|
| 42 |
+
time_passed = now - self.last_check
|
| 43 |
+
self.last_check = now
|
| 44 |
+
self.allowance += math.ceil(time_passed * (self.rate / self.per))
|
| 45 |
+
if self.allowance > self.rate:
|
| 46 |
+
self.allowance = self.rate
|
| 47 |
+
if self.allowance < 1:
|
| 48 |
+
logger.info("Rate limit exceeded", sleep_time=30)
|
| 49 |
+
await asyncio.sleep(30)
|
| 50 |
+
self.allowance = 0
|
| 51 |
+
else:
|
| 52 |
+
self.allowance -= 1
|
template/src/template/llm/llm_service.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class RateLimitError(Exception):
|
| 5 |
+
"""Custom error for handling rate limit errors from the OpenAI API."""
|
| 6 |
+
|
| 7 |
+
pass
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class LLMService(ABC):
|
| 11 |
+
"""
|
| 12 |
+
Abstract base class for Language Model Services.
|
| 13 |
+
|
| 14 |
+
This class defines the interface for interacting with various language model services.
|
| 15 |
+
Concrete implementations should provide the specific logic for invoking the language model.
|
| 16 |
+
|
| 17 |
+
Methods:
|
| 18 |
+
invoke(user_prompt: str | None = None,
|
| 19 |
+
system_prompt: str | None = None,
|
| 20 |
+
partial_assistant_prompt: str | None = None,
|
| 21 |
+
max_tokens: int = 1000,
|
| 22 |
+
temperature: float = 0.0) -> str | None:
|
| 23 |
+
Abstract method to invoke the language model with given prompts and parameters.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
@abstractmethod
|
| 27 |
+
async def invoke(
|
| 28 |
+
self,
|
| 29 |
+
user_prompt: str | None = None,
|
| 30 |
+
system_prompt: str | None = None,
|
| 31 |
+
partial_assistant_prompt: str | None = None,
|
| 32 |
+
max_tokens: int = 1000,
|
| 33 |
+
temperature: float = 0.0,
|
| 34 |
+
) -> str | None:
|
| 35 |
+
"""
|
| 36 |
+
Invoke the language model with the given prompts and parameters.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
user_prompt (str | None): The main prompt from the user.
|
| 40 |
+
system_prompt (str | None): A system message to set the context.
|
| 41 |
+
partial_assistant_prompt (str | None): A partial response from the assistant.
|
| 42 |
+
max_tokens (int): Maximum number of tokens in the response.
|
| 43 |
+
temperature (float): Sampling temperature for response generation.
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
str | None: The generated response from the language model, or None if no response.
|
| 47 |
+
"""
|
| 48 |
+
pass
|
template/src/template/llm/openai/__init__.py
ADDED
|
File without changes
|
template/src/template/llm/openai/openai.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
openai.py
|
| 3 |
+
|
| 4 |
+
This module provides an asynchronous service for interacting with OpenAI's API.
|
| 5 |
+
It handles authentication, rate limiting, and retrying of requests in case of rate limit errors.
|
| 6 |
+
|
| 7 |
+
The module includes:
|
| 8 |
+
1. A custom RateLimitException for handling rate limit errors.
|
| 9 |
+
2. An AsyncOpenAIService class for making API calls to OpenAI.
|
| 10 |
+
|
| 11 |
+
Usage:
|
| 12 |
+
async with AsyncOpenAIService(OpenAIModel.GPT_4_O) as service:
|
| 13 |
+
response = await service.invoke_openai(
|
| 14 |
+
user_prompt="Tell me a joke",
|
| 15 |
+
max_tokens=100,
|
| 16 |
+
temperature=0.7
|
| 17 |
+
)
|
| 18 |
+
print(response)
|
| 19 |
+
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import asyncio
|
| 23 |
+
from contextlib import asynccontextmanager
|
| 24 |
+
from typing import AsyncIterator
|
| 25 |
+
|
| 26 |
+
import openai
|
| 27 |
+
from openai import AsyncOpenAI
|
| 28 |
+
from openai.types.chat import ChatCompletion
|
| 29 |
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random_exponential
|
| 30 |
+
|
| 31 |
+
from template.llm.llm_service import LLMService, RateLimitError
|
| 32 |
+
from template.llm.openai.openai_model import OpenAIModel, get_openai_model_rate_limit
|
| 33 |
+
from template.llm.openai.openai_rate_limiter import OpenAIRateLimiter
|
| 34 |
+
from template.shared import aws_clients, config, logger_factory
|
| 35 |
+
|
| 36 |
+
logger = logger_factory.get_logger(__name__)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class AsyncOpenAIService(LLMService):
|
| 40 |
+
"""
|
| 41 |
+
An asynchronous service class for making calls to the OpenAI API.
|
| 42 |
+
|
| 43 |
+
This class handles authentication, rate limiting, and retrying of requests
|
| 44 |
+
when interacting with OpenAI's language models.
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def __init__(self, model: OpenAIModel, max_concurrency: int = 10):
|
| 48 |
+
"""
|
| 49 |
+
Initialize the AsyncOpenAIService.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
model (OpenAIModel): The OpenAI model to use for API calls.
|
| 53 |
+
max_concurrency (int): Maximum number of concurrent API calls. Defaults to 10.
|
| 54 |
+
"""
|
| 55 |
+
self._client = AsyncOpenAI(api_key=self._fetch_api_key())
|
| 56 |
+
self._semaphore = asyncio.Semaphore(max_concurrency)
|
| 57 |
+
self._model = model
|
| 58 |
+
rate_limit = get_openai_model_rate_limit(model)
|
| 59 |
+
self._rate_limiter = OpenAIRateLimiter(
|
| 60 |
+
initial_rate_requests=int(max(rate_limit.requests_per_minute * 0.95, 1)),
|
| 61 |
+
initial_rate_tokens=int(max(rate_limit.tokens_per_minute * 0.95, 100)),
|
| 62 |
+
per=60.0,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
@staticmethod
|
| 66 |
+
def _fetch_api_key() -> str:
|
| 67 |
+
"""
|
| 68 |
+
Fetch the OpenAI API key from AWS Parameter Store.
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
str: The OpenAI API key.
|
| 72 |
+
|
| 73 |
+
Raises:
|
| 74 |
+
ValueError: If the API key is not found in the Parameter Store.
|
| 75 |
+
RuntimeError: If there's an error accessing the Parameter Store.
|
| 76 |
+
"""
|
| 77 |
+
try:
|
| 78 |
+
return aws_clients.fetch_from_parameter_store(config.get_openai_api_key_path(), is_secret=True)
|
| 79 |
+
except aws_clients.ParameterNotFoundError as e:
|
| 80 |
+
logger.error("API key not found in Parameter Store", error=str(e))
|
| 81 |
+
raise ValueError("OpenAI API key not found") from e
|
| 82 |
+
except aws_clients.ParameterStoreAccessError as e:
|
| 83 |
+
logger.error("Error accessing Parameter Store", error=str(e))
|
| 84 |
+
raise RuntimeError("Unable to access OpenAI API key") from e
|
| 85 |
+
|
| 86 |
+
@asynccontextmanager
|
| 87 |
+
async def __call__(self) -> AsyncIterator["AsyncOpenAIService"]:
|
| 88 |
+
try:
|
| 89 |
+
yield self
|
| 90 |
+
finally:
|
| 91 |
+
await self._client.close()
|
| 92 |
+
|
| 93 |
+
async def invoke(
|
| 94 |
+
self,
|
| 95 |
+
user_prompt: str | None = None,
|
| 96 |
+
system_prompt: str | None = None,
|
| 97 |
+
partial_assistant_prompt: str | None = None,
|
| 98 |
+
max_tokens: int = 1000,
|
| 99 |
+
temperature: float = 0.0,
|
| 100 |
+
) -> str | None:
|
| 101 |
+
"""
|
| 102 |
+
Invoke the OpenAI API with the given prompts and parameters.
|
| 103 |
+
|
| 104 |
+
This method handles rate limiting and makes the API call.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
user_prompt (str | None): The main prompt from the user.
|
| 108 |
+
system_prompt (str | None): A system message to set the context.
|
| 109 |
+
partial_assistant_prompt (str | None): A partial response from the assistant.
|
| 110 |
+
max_tokens (int): Maximum number of tokens in the response.
|
| 111 |
+
temperature (float): Sampling temperature for response generation.
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
str | None: The generated response from the OpenAI API, or None if no response.
|
| 115 |
+
|
| 116 |
+
Raises:
|
| 117 |
+
RateLimitError: If the API rate limit is exceeded.
|
| 118 |
+
openai.APIError: For any other errors encountered during the API call.
|
| 119 |
+
"""
|
| 120 |
+
async with self._semaphore: # Use semaphore to limit concurrency
|
| 121 |
+
messages = []
|
| 122 |
+
if system_prompt:
|
| 123 |
+
messages.append({"role": "system", "content": system_prompt})
|
| 124 |
+
if user_prompt:
|
| 125 |
+
messages.append({"role": "user", "content": user_prompt})
|
| 126 |
+
if partial_assistant_prompt:
|
| 127 |
+
messages.append({"role": "assistant", "content": partial_assistant_prompt})
|
| 128 |
+
|
| 129 |
+
# Estimate token usage for rate limiting
|
| 130 |
+
estimated_tokens = sum(len(m["content"].split()) for m in messages) + max_tokens
|
| 131 |
+
|
| 132 |
+
await self._rate_limiter.acquire(estimated_tokens)
|
| 133 |
+
|
| 134 |
+
response = await self.query_openai(max_tokens, temperature, messages)
|
| 135 |
+
logger.info("OpenAI API called", model=self._model.value)
|
| 136 |
+
|
| 137 |
+
message = response.choices[0].message
|
| 138 |
+
text = str(message.content)
|
| 139 |
+
if message.refusal:
|
| 140 |
+
logger.error("OpenAI refusal message", refusal=message.refusal)
|
| 141 |
+
if text is None:
|
| 142 |
+
logger.warn("No message content from OpenAI API")
|
| 143 |
+
return None
|
| 144 |
+
|
| 145 |
+
if partial_assistant_prompt:
|
| 146 |
+
text = f"{partial_assistant_prompt}{text}"
|
| 147 |
+
|
| 148 |
+
# Extract token usage information
|
| 149 |
+
usage = response.usage
|
| 150 |
+
input_tokens = usage.prompt_tokens if usage else 0
|
| 151 |
+
output_tokens = usage.completion_tokens if usage else 0
|
| 152 |
+
|
| 153 |
+
# Log token usage
|
| 154 |
+
logger.info("Token usage", input_tokens=input_tokens, output_tokens=output_tokens)
|
| 155 |
+
|
| 156 |
+
return text
|
| 157 |
+
|
| 158 |
+
@retry(
|
| 159 |
+
wait=wait_random_exponential(min=1, max=60),
|
| 160 |
+
stop=stop_after_attempt(6),
|
| 161 |
+
retry=retry_if_exception_type(RateLimitError),
|
| 162 |
+
) # type: ignore
|
| 163 |
+
async def query_openai(self, max_tokens, temperature, messages) -> ChatCompletion:
|
| 164 |
+
"""
|
| 165 |
+
Make an API call to OpenAI with retry logic for rate limit errors.
|
| 166 |
+
|
| 167 |
+
This method is decorated with a retry mechanism that will attempt to retry
|
| 168 |
+
the call up to 6 times with exponential backoff if a RateLimitError is raised.
|
| 169 |
+
|
| 170 |
+
Args:
|
| 171 |
+
max_tokens (int): Maximum number of tokens in the response.
|
| 172 |
+
temperature (float): Sampling temperature for response generation.
|
| 173 |
+
messages (list): List of message dictionaries to send to the API.
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
ChatCompletion: The response from the OpenAI API.
|
| 177 |
+
|
| 178 |
+
Raises:
|
| 179 |
+
RateLimitError: If the API rate limit is exceeded after all retry attempts.
|
| 180 |
+
openai.APIError: For any other errors encountered during the API call.
|
| 181 |
+
"""
|
| 182 |
+
try:
|
| 183 |
+
response = await self._client.chat.completions.with_raw_response.create(
|
| 184 |
+
model=self._model.value,
|
| 185 |
+
messages=messages,
|
| 186 |
+
max_tokens=max_tokens,
|
| 187 |
+
temperature=temperature,
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# Update rate limit info based on response headers
|
| 191 |
+
self._rate_limiter.update_rate_limit_info(response.headers)
|
| 192 |
+
|
| 193 |
+
return response.parse()
|
| 194 |
+
|
| 195 |
+
except openai.RateLimitError as e:
|
| 196 |
+
logger.warning("Rate limit error encountered. Retrying...")
|
| 197 |
+
raise RateLimitError("OpenAI API rate limit exceeded") from e
|
| 198 |
+
except openai.APIError as e:
|
| 199 |
+
logger.error("OpenAI API error", error=str(e))
|
| 200 |
+
raise
|
template/src/template/llm/openai/openai_model.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
from enum import Enum
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@dataclass
|
| 6 |
+
class RateLimit:
|
| 7 |
+
"""
|
| 8 |
+
Dataclass representing rate limit information for OpenAI models.
|
| 9 |
+
|
| 10 |
+
Attributes:
|
| 11 |
+
requests_per_minute (int): The maximum number of requests allowed per minute.
|
| 12 |
+
tokens_per_minute (int): The maximum number of tokens allowed per minute.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
requests_per_minute: int
|
| 16 |
+
tokens_per_minute: int
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class OpenAIModel(Enum):
|
| 20 |
+
"""
|
| 21 |
+
Enum representing different OpenAI models with their rate limits.
|
| 22 |
+
|
| 23 |
+
Attributes:
|
| 24 |
+
GPT_4_O: Represents the GPT-4 model.
|
| 25 |
+
GPT_4_MINI: Represents the GPT-4 mini model.
|
| 26 |
+
|
| 27 |
+
Methods:
|
| 28 |
+
get_rate_limit(model: OpenAIModel) -> RateLimit:
|
| 29 |
+
Get the rate limit for a specific model.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
GPT_4_O = "gpt-4o"
|
| 33 |
+
GPT_4_MINI = "gpt-4o-mini"
|
| 34 |
+
|
| 35 |
+
@classmethod
|
| 36 |
+
def get_rate_limit(cls, model: "OpenAIModel") -> RateLimit:
|
| 37 |
+
"""
|
| 38 |
+
Get the rate limit for a specific OpenAI model.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
model (OpenAIModel): The OpenAI model to get the rate limit for.
|
| 42 |
+
|
| 43 |
+
Returns:
|
| 44 |
+
RateLimit: The rate limit information for the specified model.
|
| 45 |
+
"""
|
| 46 |
+
return cls._rate_limits()[model]
|
| 47 |
+
|
| 48 |
+
@classmethod
|
| 49 |
+
def _rate_limits(cls) -> dict[Enum, RateLimit]:
|
| 50 |
+
"""
|
| 51 |
+
Define the rate limits for each OpenAI model.
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
dict[Enum, RateLimit]: A dictionary mapping each model to its rate limit.
|
| 55 |
+
"""
|
| 56 |
+
return {
|
| 57 |
+
cls.GPT_4_O: RateLimit(500, 30_000),
|
| 58 |
+
cls.GPT_4_MINI: RateLimit(500, 60_000),
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def get_openai_model_rate_limit(model: OpenAIModel) -> RateLimit:
|
| 63 |
+
"""
|
| 64 |
+
Get the rate limit for a specific OpenAI model.
|
| 65 |
+
|
| 66 |
+
This is a convenience function that calls the get_rate_limit method of the OpenAIModel enum.
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
model (OpenAIModel): The OpenAI model to get the rate limit for.
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
RateLimit: The rate limit information for the specified model.
|
| 73 |
+
"""
|
| 74 |
+
return OpenAIModel.get_rate_limit(model)
|
template/src/template/llm/openai/openai_rate_limiter.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
rate_limiter.py
|
| 3 |
+
|
| 4 |
+
This module provides a RateLimiter class for managing API rate limits based on both request count and token usage.
|
| 5 |
+
It's designed to work with OpenAI's API rate limit headers but can be adapted for other APIs with similar rate limiting
|
| 6 |
+
mechanisms.
|
| 7 |
+
|
| 8 |
+
The module includes:
|
| 9 |
+
1. A RateLimitInfo dataclass to store rate limit information.
|
| 10 |
+
2. A parse_time_string function to convert time strings to seconds using pytimeparse.
|
| 11 |
+
3. A RateLimiter class that manages and enforces rate limits.
|
| 12 |
+
|
| 13 |
+
Dependencies:
|
| 14 |
+
pytimeparse: Install with `pip install pytimeparse`
|
| 15 |
+
|
| 16 |
+
Usage:
|
| 17 |
+
rate_limiter = RateLimiter(initial_rate_requests=60, initial_rate_tokens=150000)
|
| 18 |
+
await rate_limiter.acquire(estimated_tokens)
|
| 19 |
+
# Make API call
|
| 20 |
+
rate_limiter.update_rate_limit_info(response_headers)
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
import asyncio
|
| 24 |
+
import math
|
| 25 |
+
import re
|
| 26 |
+
import time
|
| 27 |
+
from dataclasses import dataclass
|
| 28 |
+
|
| 29 |
+
from httpx import Headers
|
| 30 |
+
from pytimeparse import parse
|
| 31 |
+
|
| 32 |
+
from template.shared import logger_factory
|
| 33 |
+
|
| 34 |
+
logger = logger_factory.get_logger(__name__)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@dataclass
|
| 38 |
+
class OpenAIRateLimitInfo:
|
| 39 |
+
"""
|
| 40 |
+
Dataclass to store rate limit information for OpenAI API.
|
| 41 |
+
|
| 42 |
+
Attributes:
|
| 43 |
+
limit_requests (int): Maximum number of requests allowed.
|
| 44 |
+
limit_tokens (int): Maximum number of tokens allowed.
|
| 45 |
+
remaining_requests (int): Number of requests remaining.
|
| 46 |
+
remaining_tokens (int): Number of tokens remaining.
|
| 47 |
+
reset_requests_seconds (float): Time in seconds until the request limit resets.
|
| 48 |
+
reset_tokens_seconds (float): Time in seconds until the token limit resets.
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
limit_requests: int
|
| 52 |
+
limit_tokens: int
|
| 53 |
+
remaining_requests: int
|
| 54 |
+
remaining_tokens: int
|
| 55 |
+
reset_requests_seconds: float
|
| 56 |
+
reset_tokens_seconds: float
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def parse_time_string(time_str: str) -> float:
|
| 60 |
+
"""
|
| 61 |
+
Parse a time string to seconds, handling various formats including milliseconds.
|
| 62 |
+
|
| 63 |
+
This function handles time strings in various formats, including those
|
| 64 |
+
returned in OpenAI's rate limit headers (e.g., '12h38m27.913s', '6m0s', '932ms').
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
time_str (str): A string representing a duration.
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
float: The total number of seconds represented by the time string.
|
| 71 |
+
|
| 72 |
+
Raises:
|
| 73 |
+
ValueError: If the time string cannot be parsed.
|
| 74 |
+
|
| 75 |
+
Examples::
|
| 76 |
+
|
| 77 |
+
>>> parse_time_string('1h30m')
|
| 78 |
+
5400.0
|
| 79 |
+
>>> parse_time_string('45.5s')
|
| 80 |
+
45.5
|
| 81 |
+
>>> parse_time_string('1h23m45.6s')
|
| 82 |
+
5025.6
|
| 83 |
+
>>> parse_time_string('6m0s')
|
| 84 |
+
360.0
|
| 85 |
+
>>> parse_time_string('932ms')
|
| 86 |
+
0.932
|
| 87 |
+
"""
|
| 88 |
+
# Check if the string is in milliseconds format
|
| 89 |
+
ms_match = re.match(r"^(\d+)ms$", time_str)
|
| 90 |
+
if ms_match:
|
| 91 |
+
return float(ms_match.group(1)) / 1000
|
| 92 |
+
|
| 93 |
+
# Use pytimeparse for other formats
|
| 94 |
+
seconds = parse(time_str)
|
| 95 |
+
if seconds is None:
|
| 96 |
+
raise ValueError(f"Could not parse time string: {time_str}")
|
| 97 |
+
return float(seconds)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
class OpenAIRateLimiter:
|
| 101 |
+
"""
|
| 102 |
+
A class to manage and enforce rate limits for OpenAI API calls.
|
| 103 |
+
|
| 104 |
+
This class handles both request-based and token-based rate limiting.
|
| 105 |
+
It can adapt to changing rate limits based on information provided in API response headers.
|
| 106 |
+
"""
|
| 107 |
+
|
| 108 |
+
def __init__(self, initial_rate_requests: int, initial_rate_tokens: int, per: float = 60.0):
|
| 109 |
+
"""
|
| 110 |
+
Initialize the OpenAIRateLimiter.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
initial_rate_requests (int): Initial number of requests allowed per minute.
|
| 114 |
+
initial_rate_tokens (int): Initial number of tokens allowed per minute.
|
| 115 |
+
per (float): The time period in seconds for which the rate applies. Defaults to 60.0.
|
| 116 |
+
"""
|
| 117 |
+
self.rate_requests = initial_rate_requests
|
| 118 |
+
self.rate_tokens = initial_rate_tokens
|
| 119 |
+
self.per = per
|
| 120 |
+
self.allowance_requests = initial_rate_requests
|
| 121 |
+
self.allowance_tokens = initial_rate_tokens
|
| 122 |
+
self.last_check = time.time()
|
| 123 |
+
self.rate_limit_info: OpenAIRateLimitInfo | None = None
|
| 124 |
+
|
| 125 |
+
async def acquire(self, tokens_to_use: int) -> None:
|
| 126 |
+
"""
|
| 127 |
+
Acquire permission to make an API call, respecting rate limits.
|
| 128 |
+
|
| 129 |
+
This method checks both request and token limits. If either limit is exceeded,
|
| 130 |
+
it will pause execution for an appropriate amount of time.
|
| 131 |
+
|
| 132 |
+
Args:
|
| 133 |
+
tokens_to_use (int): The estimated number of tokens that will be used in this API call.
|
| 134 |
+
|
| 135 |
+
Raises:
|
| 136 |
+
asyncio.TimeoutError: If the sleep time exceeds the maximum allowed wait time.
|
| 137 |
+
"""
|
| 138 |
+
now = time.time()
|
| 139 |
+
time_passed = now - self.last_check
|
| 140 |
+
self.last_check = now
|
| 141 |
+
|
| 142 |
+
if self.rate_limit_info:
|
| 143 |
+
self.allowance_requests = self.rate_limit_info.remaining_requests
|
| 144 |
+
self.allowance_tokens = self.rate_limit_info.remaining_tokens
|
| 145 |
+
reset_time_requests_seconds = self.rate_limit_info.reset_requests_seconds
|
| 146 |
+
reset_time_tokens_seconds = self.rate_limit_info.reset_tokens_seconds
|
| 147 |
+
else:
|
| 148 |
+
self.allowance_requests += int(time_passed * (self.rate_requests / self.per))
|
| 149 |
+
self.allowance_tokens += int(time_passed * (self.rate_tokens / self.per))
|
| 150 |
+
self.allowance_requests = min(self.allowance_requests, self.rate_requests)
|
| 151 |
+
self.allowance_tokens = min(self.allowance_tokens, self.rate_tokens)
|
| 152 |
+
# Default reset time if we don't have rate limit info
|
| 153 |
+
reset_time_requests_seconds = reset_time_tokens_seconds = 30
|
| 154 |
+
|
| 155 |
+
wait_time = 0
|
| 156 |
+
if self.allowance_requests < 1:
|
| 157 |
+
wait_time = int(max(wait_time, math.ceil(reset_time_requests_seconds)))
|
| 158 |
+
if self.allowance_tokens < tokens_to_use:
|
| 159 |
+
# If token reset time is more than a minute, it's likely the daily limit
|
| 160 |
+
# In this case, we'll wait for the request reset time instead
|
| 161 |
+
wait_time = int(max(wait_time, min(math.ceil(reset_time_tokens_seconds), 60)))
|
| 162 |
+
|
| 163 |
+
if wait_time > 0:
|
| 164 |
+
logger.info("Rate limit exceeded", sleep_time=wait_time)
|
| 165 |
+
await asyncio.sleep(wait_time)
|
| 166 |
+
self.allowance_requests = self.rate_requests
|
| 167 |
+
self.allowance_tokens = self.rate_tokens
|
| 168 |
+
else:
|
| 169 |
+
self.allowance_requests -= 1
|
| 170 |
+
self.allowance_tokens -= tokens_to_use
|
| 171 |
+
|
| 172 |
+
def update_rate_limit_info(self, headers: Headers) -> None:
|
| 173 |
+
"""
|
| 174 |
+
Update the rate limit information based on API response headers.
|
| 175 |
+
|
| 176 |
+
This method should be called after each successful API call to keep
|
| 177 |
+
the rate limit information up-to-date.
|
| 178 |
+
|
| 179 |
+
Args:
|
| 180 |
+
headers (Headers): The response headers from the API call.
|
| 181 |
+
|
| 182 |
+
Note:
|
| 183 |
+
This method expects headers to include 'x-ratelimit-*' keys as provided by the OpenAI API.
|
| 184 |
+
If these headers are not present, it falls back to the initial or current values.
|
| 185 |
+
"""
|
| 186 |
+
self.rate_limit_info = OpenAIRateLimitInfo(
|
| 187 |
+
limit_requests=int(headers.get("x-ratelimit-limit-requests", self.rate_requests)),
|
| 188 |
+
limit_tokens=int(headers.get("x-ratelimit-limit-tokens", self.rate_tokens)),
|
| 189 |
+
remaining_requests=int(headers.get("x-ratelimit-remaining-requests", self.allowance_requests)),
|
| 190 |
+
remaining_tokens=int(headers.get("x-ratelimit-remaining-tokens", self.allowance_tokens)),
|
| 191 |
+
reset_requests_seconds=parse_time_string(headers.get("x-ratelimit-reset-requests", "30s")),
|
| 192 |
+
reset_tokens_seconds=parse_time_string(headers.get("x-ratelimit-reset-tokens", "30s")),
|
| 193 |
+
)
|
template/src/template/llm/openrouter/__init__.py
ADDED
|
File without changes
|
template/src/template/llm/prompt.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Callable
|
| 2 |
+
|
| 3 |
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random_exponential
|
| 4 |
+
|
| 5 |
+
from template.llm.llm_service import LLMService
|
| 6 |
+
from template.llm.prompt_text import PromptText
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class PromptEvaluationError(Exception):
|
| 10 |
+
"""
|
| 11 |
+
Custom exception for errors during prompt evaluation.
|
| 12 |
+
|
| 13 |
+
This exception is raised when an error occurs during the evaluation of a prompt.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
pass
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class Prompt:
|
| 20 |
+
"""
|
| 21 |
+
A class to manage different types of prompts for language models.
|
| 22 |
+
|
| 23 |
+
This class handles system prompts, user prompts, partial assistant prompts,
|
| 24 |
+
and output formatting for language model interactions.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(
|
| 28 |
+
self,
|
| 29 |
+
llm_service: LLMService,
|
| 30 |
+
system_prompt: PromptText | None = None,
|
| 31 |
+
user_prompt: PromptText | None = None,
|
| 32 |
+
partial_assistant_prompt: PromptText | None = None,
|
| 33 |
+
max_tokens: int = 1000,
|
| 34 |
+
temperature: float = 0.0,
|
| 35 |
+
output_formatter: Callable[[str], Any] = lambda x: x,
|
| 36 |
+
):
|
| 37 |
+
"""
|
| 38 |
+
Initialize a Prompt object.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
llm_service (LLMService): The language model service to use for evaluation.
|
| 42 |
+
system_prompt (PromptText | None): The system prompt.
|
| 43 |
+
user_prompt (PromptText | None): The user prompt.
|
| 44 |
+
partial_assistant_prompt (PromptText | None): The partial assistant prompt.
|
| 45 |
+
max_tokens (int): The maximum number of tokens to generate.
|
| 46 |
+
temperature (float): The sampling temperature.
|
| 47 |
+
output_formatter (Callable[[str], Any]): A function to format the output.
|
| 48 |
+
|
| 49 |
+
Raises:
|
| 50 |
+
ValueError: If both system_prompt and user_prompt are None.
|
| 51 |
+
"""
|
| 52 |
+
self._system_prompt = system_prompt
|
| 53 |
+
self._user_prompt = user_prompt
|
| 54 |
+
self._partial_assistant_prompt = partial_assistant_prompt
|
| 55 |
+
self._max_tokens = max_tokens
|
| 56 |
+
self._temperature = temperature
|
| 57 |
+
self._output_formatter: Callable[[str], Any] = output_formatter
|
| 58 |
+
self._llm_service = llm_service
|
| 59 |
+
|
| 60 |
+
if not self._system_prompt and not self._user_prompt:
|
| 61 |
+
raise ValueError("At least one of system_prompt or user_prompt must be provided")
|
| 62 |
+
|
| 63 |
+
@property
|
| 64 |
+
def llm_service(self) -> LLMService:
|
| 65 |
+
"""The language model service used for evaluation."""
|
| 66 |
+
return self._llm_service
|
| 67 |
+
|
| 68 |
+
@llm_service.setter
|
| 69 |
+
def llm_service(self, value: LLMService) -> None:
|
| 70 |
+
"""
|
| 71 |
+
Set the language model service for the prompt to something other than the constructor value.
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
llm_service (LLMService): The language model service to use for evaluation.
|
| 75 |
+
"""
|
| 76 |
+
self._llm_service = value
|
| 77 |
+
|
| 78 |
+
@retry(
|
| 79 |
+
wait=wait_random_exponential(min=1, max=60),
|
| 80 |
+
stop=stop_after_attempt(3),
|
| 81 |
+
retry=retry_if_exception_type(PromptEvaluationError),
|
| 82 |
+
reraise=True,
|
| 83 |
+
)
|
| 84 |
+
async def evaluate(self) -> Any:
|
| 85 |
+
"""
|
| 86 |
+
Evaluate the prompt using the LLM service.
|
| 87 |
+
|
| 88 |
+
Returns:
|
| 89 |
+
Any: The formatted output of the LLM service, or None if no result.
|
| 90 |
+
|
| 91 |
+
Raises:
|
| 92 |
+
PromptEvaluationError: If an error occurs during prompt evaluation.
|
| 93 |
+
"""
|
| 94 |
+
result = await self._llm_service.invoke(
|
| 95 |
+
user_prompt=self._user_prompt.get_prompt() if self._user_prompt else None,
|
| 96 |
+
system_prompt=self._system_prompt.get_prompt() if self._system_prompt else None,
|
| 97 |
+
partial_assistant_prompt=(
|
| 98 |
+
self._partial_assistant_prompt.get_prompt() if self._partial_assistant_prompt else None
|
| 99 |
+
),
|
| 100 |
+
max_tokens=self._max_tokens,
|
| 101 |
+
temperature=self._temperature,
|
| 102 |
+
)
|
| 103 |
+
if result is None:
|
| 104 |
+
raise PromptEvaluationError("No result from LLM service")
|
| 105 |
+
try:
|
| 106 |
+
return self._output_formatter(result)
|
| 107 |
+
except Exception as e:
|
| 108 |
+
# This might happen because of just randomness in the LLM output
|
| 109 |
+
# Let's throw a special exception that triggers a retry of the prompt
|
| 110 |
+
raise PromptEvaluationError("Error formatting output") from e
|
| 111 |
+
|
| 112 |
+
def upsert_inputs(self, new_inputs: dict[str, Any]) -> None:
|
| 113 |
+
"""
|
| 114 |
+
Update the prompts with new inputs.
|
| 115 |
+
|
| 116 |
+
This method updates all non-None prompts (system, user, and partial assistant)
|
| 117 |
+
with the provided new inputs.
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
new_inputs (dict[str, Any]): A dictionary of new input values to update the prompts.
|
| 121 |
+
"""
|
| 122 |
+
if self._system_prompt:
|
| 123 |
+
self._system_prompt.upsert_inputs(new_inputs)
|
| 124 |
+
if self._user_prompt:
|
| 125 |
+
self._user_prompt.upsert_inputs(new_inputs)
|
| 126 |
+
if self._partial_assistant_prompt:
|
| 127 |
+
self._partial_assistant_prompt.upsert_inputs(new_inputs)
|
template/src/template/llm/prompt_chain.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Union
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
|
| 5 |
+
from template.llm.prompt import Prompt
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class PromptChain:
|
| 9 |
+
"""
|
| 10 |
+
A class to manage and execute a chain of Prompt objects.
|
| 11 |
+
|
| 12 |
+
This class allows for the creation of a sequence of prompts where the output
|
| 13 |
+
of each prompt is used as input for the subsequent prompt in the chain.
|
| 14 |
+
|
| 15 |
+
Attributes:
|
| 16 |
+
prompts (list[Prompt]): An ordered list of Prompt objects to be executed in sequence.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, prompts: list[Prompt]):
|
| 20 |
+
"""
|
| 21 |
+
Initialize a PromptChain object.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
prompts (list[Prompt]): An ordered list of Prompt objects to be chained together.
|
| 25 |
+
"""
|
| 26 |
+
self._prompts = prompts
|
| 27 |
+
|
| 28 |
+
@property
|
| 29 |
+
def prompts(self) -> list[Prompt]:
|
| 30 |
+
"""
|
| 31 |
+
All prompts in the chain. Potentially useful for changing the prompts dynamically.
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
list[Prompt]: A list of all Prompt objects in the chain.
|
| 35 |
+
"""
|
| 36 |
+
return self._prompts
|
| 37 |
+
|
| 38 |
+
async def evaluate(self) -> Union[Any, dict[str, Any], BaseModel]:
|
| 39 |
+
"""
|
| 40 |
+
Evaluate the entire chain of prompts.
|
| 41 |
+
|
| 42 |
+
This method executes each prompt in the chain sequentially, passing the output
|
| 43 |
+
of each prompt as input to the next. The final result of the last prompt in the
|
| 44 |
+
chain is returned.
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
Union[Any, dict[str, Any], BaseModel]: The output of the last prompt in the chain.
|
| 48 |
+
The type depends on the output_formatter of the last prompt.
|
| 49 |
+
|
| 50 |
+
Raises:
|
| 51 |
+
Any exception that might be raised by the individual Prompt.evaluate() calls.
|
| 52 |
+
|
| 53 |
+
Note:
|
| 54 |
+
- If a prompt's output is a dictionary, it will be used to update the input for the next prompt.
|
| 55 |
+
- If a prompt's output is a Pydantic model, its dictionary representation will be used to update the input.
|
| 56 |
+
- For any other type of output, it will be stored under the key 'previous_output' in the input dictionary.
|
| 57 |
+
"""
|
| 58 |
+
current_input: dict[str, Any] = {}
|
| 59 |
+
for prompt in self._prompts:
|
| 60 |
+
# Update the prompts with the current input
|
| 61 |
+
prompt.upsert_inputs(current_input)
|
| 62 |
+
# Evaluate the prompt
|
| 63 |
+
result = await prompt.evaluate()
|
| 64 |
+
# Prepare the input for the next prompt
|
| 65 |
+
if isinstance(result, dict):
|
| 66 |
+
current_input.update(result)
|
| 67 |
+
elif isinstance(result, BaseModel):
|
| 68 |
+
current_input.update(result.model_dump())
|
| 69 |
+
else:
|
| 70 |
+
current_input["previous_output"] = result
|
| 71 |
+
return result
|
| 72 |
+
|
| 73 |
+
@classmethod
|
| 74 |
+
def create(cls, *args: dict[str, Any] | Prompt) -> "PromptChain":
|
| 75 |
+
"""
|
| 76 |
+
Create a PromptChain from a series of prompt configurations or Prompt objects.
|
| 77 |
+
|
| 78 |
+
This class method provides a convenient way to create a PromptChain by
|
| 79 |
+
specifying either configurations for each Prompt or direct Prompt objects.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
*args: Variable number of arguments, each being either:
|
| 83 |
+
- A dictionary containing the configuration for a single Prompt in the chain.
|
| 84 |
+
- A Prompt object.
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
PromptChain: A new PromptChain instance with the specified prompts.
|
| 88 |
+
|
| 89 |
+
Raises:
|
| 90 |
+
ValueError: If an argument is neither a dict nor a Prompt object.
|
| 91 |
+
|
| 92 |
+
Example::
|
| 93 |
+
|
| 94 |
+
chain = PromptChain.create(
|
| 95 |
+
{
|
| 96 |
+
"llm_service": llm_service1,
|
| 97 |
+
"system_prompt": PromptText("You are a helpful assistant."),
|
| 98 |
+
"user_prompt": PromptText("Summarize this: {text}", inputs={"text": "A long text."}),
|
| 99 |
+
"output_formatter": lambda x: {"summary": x}
|
| 100 |
+
},
|
| 101 |
+
# You can also use Prompt() objects directly
|
| 102 |
+
Prompt(llm_service2, user_prompt=PromptText("Translate to French: {summary}")),
|
| 103 |
+
{
|
| 104 |
+
"llm_service": llm_service3,
|
| 105 |
+
"user_prompt": PromptText("Make it formal: {previous_output}")
|
| 106 |
+
}
|
| 107 |
+
)
|
| 108 |
+
"""
|
| 109 |
+
prompts: list[Prompt] = []
|
| 110 |
+
for arg in args:
|
| 111 |
+
if isinstance(arg, dict):
|
| 112 |
+
prompts.append(Prompt(**arg))
|
| 113 |
+
elif isinstance(arg, Prompt):
|
| 114 |
+
prompts.append(arg)
|
| 115 |
+
else:
|
| 116 |
+
raise ValueError(f"Invalid argument type: {type(arg)}. Expected dict or Prompt.")
|
| 117 |
+
return cls(prompts)
|
template/src/template/llm/prompt_template.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
@dataclass
|
| 5 |
+
class PromptTemplate:
|
| 6 |
+
"""
|
| 7 |
+
A dataclass representing the content of a prompt template.
|
| 8 |
+
|
| 9 |
+
This class holds the raw text content for different components of a prompt,
|
| 10 |
+
such as system instructions, user input, and partial assistant responses.
|
| 11 |
+
|
| 12 |
+
Attributes:
|
| 13 |
+
name (str): The name of the prompt template.
|
| 14 |
+
system_prompt (str | None): The content of the system prompt, if any.
|
| 15 |
+
user_prompt (str | None): The content of the user prompt, if any.
|
| 16 |
+
partial_assistant_prompt (str | None): The content of a partial assistant prompt, if any.
|
| 17 |
+
|
| 18 |
+
At least one of system_prompt or user_prompt should be non-None for a valid template.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
name: str
|
| 22 |
+
system_prompt: str | None = None
|
| 23 |
+
user_prompt: str | None = None
|
| 24 |
+
partial_assistant_prompt: str | None = None
|
template/src/template/llm/prompt_text.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class _DefaultDict(dict[str, Any]):
|
| 5 |
+
def __missing__(self, key: str) -> str:
|
| 6 |
+
return "{" + key + "}"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class PromptText:
|
| 10 |
+
"""
|
| 11 |
+
A class for managing and formatting prompt templates with dynamic inputs.
|
| 12 |
+
|
| 13 |
+
This class allows for the creation of prompt templates with placeholders
|
| 14 |
+
that can be filled with dynamic content. It provides methods to update
|
| 15 |
+
inputs and retrieve the formatted prompt.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, prompt_template: str, inputs: dict[str, Any] | None = None) -> None:
|
| 19 |
+
"""
|
| 20 |
+
Initialize a PromptText object.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
prompt_template (str): The template string for the prompt.
|
| 24 |
+
inputs (dict[str, Any] | None): Initial input values to substitute into the template.
|
| 25 |
+
If None, an empty dictionary will be used.
|
| 26 |
+
|
| 27 |
+
Raises:
|
| 28 |
+
ValueError: If the template is invalid (e.g., unmatched braces).
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
self.prompt_template = prompt_template
|
| 32 |
+
self.inputs = inputs or {}
|
| 33 |
+
self._prompt = ""
|
| 34 |
+
self._update_prompt()
|
| 35 |
+
|
| 36 |
+
def _update_prompt(self) -> None:
|
| 37 |
+
"""
|
| 38 |
+
Update the prompt by formatting the template with the current inputs.
|
| 39 |
+
|
| 40 |
+
This method is called internally whenever the inputs are updated.
|
| 41 |
+
|
| 42 |
+
Raises:
|
| 43 |
+
ValueError: If the template is invalid (e.g., unmatched braces).
|
| 44 |
+
"""
|
| 45 |
+
default_inputs = _DefaultDict(self.inputs)
|
| 46 |
+
try:
|
| 47 |
+
self._prompt = self.prompt_template.format_map(default_inputs)
|
| 48 |
+
except ValueError as e:
|
| 49 |
+
raise ValueError(f"Invalid prompt template: {e}")
|
| 50 |
+
|
| 51 |
+
def upsert_inputs(self, new_inputs: dict[str, Any]) -> None:
|
| 52 |
+
"""
|
| 53 |
+
Update the inputs and regenerate the prompt.
|
| 54 |
+
|
| 55 |
+
This method updates the existing inputs with new values and then
|
| 56 |
+
regenerates the prompt using the updated inputs.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
new_inputs (dict[str, Any]): A dictionary of new input values to update.
|
| 60 |
+
|
| 61 |
+
Raises:
|
| 62 |
+
ValueError: If the template becomes invalid after updating inputs.
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
self.inputs.update(new_inputs)
|
| 66 |
+
self._update_prompt()
|
| 67 |
+
|
| 68 |
+
def get_prompt(self) -> str:
|
| 69 |
+
"""
|
| 70 |
+
Retrieve the current formatted prompt.
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
str: The current prompt string after formatting with inputs.
|
| 74 |
+
"""
|
| 75 |
+
return self._prompt
|
template/src/template/shared/__init__.py
ADDED
|
File without changes
|
template/src/template/shared/aws_clients.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Any, cast
|
| 3 |
+
|
| 4 |
+
import boto3
|
| 5 |
+
import requests
|
| 6 |
+
from boto3.resources.base import ServiceResource
|
| 7 |
+
from botocore.client import BaseClient
|
| 8 |
+
from botocore.exceptions import ClientError
|
| 9 |
+
from mypy_boto3_bedrock_runtime import BedrockRuntimeClient
|
| 10 |
+
from mypy_boto3_dynamodb.service_resource import DynamoDBServiceResource
|
| 11 |
+
from mypy_boto3_s3 import Client as S3Client
|
| 12 |
+
from mypy_boto3_ssm import Client as SSMClient
|
| 13 |
+
|
| 14 |
+
from template.shared import config, logger_factory
|
| 15 |
+
|
| 16 |
+
logger = logger_factory.get_logger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _get_session() -> boto3.Session:
|
| 20 |
+
"""
|
| 21 |
+
Creates and returns a boto3 session based on the execution environment.
|
| 22 |
+
|
| 23 |
+
If running in ECS (determined by the presence of AWS_CONTAINER_CREDENTIALS_RELATIVE_URI
|
| 24 |
+
environment variable), it returns a default boto3 session which will use the ECS task's
|
| 25 |
+
IAM role. Otherwise, it returns a session using the "Geometric-PowerUserAccess" profile
|
| 26 |
+
for local execution.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
boto3.Session: A boto3 session configured for the current execution environment.
|
| 30 |
+
"""
|
| 31 |
+
if "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" in os.environ:
|
| 32 |
+
return boto3.Session()
|
| 33 |
+
else:
|
| 34 |
+
return boto3.Session(profile_name="Geometric-PowerUserAccess")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class ECSCredentialsError(Exception):
|
| 38 |
+
"""Raised when there's an error retrieving ECS task credentials."""
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class RoleAssumptionError(Exception):
|
| 42 |
+
"""Raised when there's an error assuming an IAM role."""
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def get_credentials() -> dict[str, str]:
|
| 46 |
+
"""
|
| 47 |
+
Retrieves AWS credentials based on the execution environment.
|
| 48 |
+
|
| 49 |
+
If running in ECS, it retrieves credentials from the ECS task's metadata.
|
| 50 |
+
If running locally, it assumes the role specified in the configuration.
|
| 51 |
+
|
| 52 |
+
Returns:
|
| 53 |
+
dict[str, str]: A dictionary containing AccessKeyId, SecretAccessKey, and SessionToken.
|
| 54 |
+
|
| 55 |
+
Raises:
|
| 56 |
+
ECSCredentialsError: If there's an error retrieving ECS task credentials.
|
| 57 |
+
RoleAssumptionError: If there's an error assuming the specified IAM role.
|
| 58 |
+
ValueError: If the execution environment is not recognized.
|
| 59 |
+
"""
|
| 60 |
+
session = _get_session()
|
| 61 |
+
if "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" in os.environ:
|
| 62 |
+
logger.info("Using ECS task's IAM role for credentials")
|
| 63 |
+
return _get_ecs_credentials()
|
| 64 |
+
else:
|
| 65 |
+
logger.info("Assuming role", role_arn=config.get_role_arn())
|
| 66 |
+
return _assume_role(session)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _get_ecs_credentials() -> dict[str, str]:
|
| 70 |
+
ecs_creds_url = f"http://169.254.170.2{os.environ.get('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI')}"
|
| 71 |
+
response = requests.get(ecs_creds_url, timeout=5)
|
| 72 |
+
if response.status_code == 200:
|
| 73 |
+
creds = response.json()
|
| 74 |
+
return {
|
| 75 |
+
"AccessKeyId": creds["AccessKeyId"],
|
| 76 |
+
"SecretAccessKey": creds["SecretAccessKey"],
|
| 77 |
+
"SessionToken": creds["Token"],
|
| 78 |
+
}
|
| 79 |
+
else:
|
| 80 |
+
raise ECSCredentialsError(f"Failed to retrieve ECS task credentials. Status code: {response.status_code}")
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _assume_role(session: boto3.Session) -> dict[str, str]:
|
| 84 |
+
try:
|
| 85 |
+
sts_client = session.client("sts")
|
| 86 |
+
assumed_role = sts_client.assume_role(RoleArn=config.get_role_arn(), RoleSessionName="AssumeRoleSession")[
|
| 87 |
+
"Credentials"
|
| 88 |
+
]
|
| 89 |
+
logger.info("Role assumed successfully")
|
| 90 |
+
return {
|
| 91 |
+
"AccessKeyId": assumed_role["AccessKeyId"],
|
| 92 |
+
"SecretAccessKey": assumed_role["SecretAccessKey"],
|
| 93 |
+
"SessionToken": assumed_role["SessionToken"],
|
| 94 |
+
}
|
| 95 |
+
except ClientError as e:
|
| 96 |
+
raise RoleAssumptionError(f"Failed to assume role: {e}") from e
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _get_boto3_client(service_name: str) -> BaseClient:
|
| 100 |
+
"""
|
| 101 |
+
Creates and returns a boto3 client for the specified AWS service.
|
| 102 |
+
|
| 103 |
+
This function uses the session and credentials appropriate for the current
|
| 104 |
+
execution environment (ECS or local).
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
service_name (str): The name of the AWS service for which to create a client.
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
BaseClient: A boto3 client for the specified service.
|
| 111 |
+
"""
|
| 112 |
+
logger.info("Creating boto3 client", service=service_name)
|
| 113 |
+
session = _get_session()
|
| 114 |
+
credentials = get_credentials()
|
| 115 |
+
kwargs: dict[str, Any] = {"region_name": config.get_aws_region(), "use_ssl": True}
|
| 116 |
+
if credentials:
|
| 117 |
+
kwargs.update(
|
| 118 |
+
{
|
| 119 |
+
"aws_access_key_id": credentials["AccessKeyId"],
|
| 120 |
+
"aws_secret_access_key": credentials["SecretAccessKey"],
|
| 121 |
+
"aws_session_token": credentials["SessionToken"],
|
| 122 |
+
}
|
| 123 |
+
)
|
| 124 |
+
return session.client(service_name, **kwargs)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def _get_boto3_resource(service_name: str) -> ServiceResource:
|
| 128 |
+
"""
|
| 129 |
+
Creates and returns a boto3 resource for the specified AWS service.
|
| 130 |
+
|
| 131 |
+
This function uses the session and credentials appropriate for the current
|
| 132 |
+
execution environment (ECS or local).
|
| 133 |
+
|
| 134 |
+
Args:
|
| 135 |
+
service_name (str): The name of the AWS service for which to create a resource.
|
| 136 |
+
|
| 137 |
+
Returns:
|
| 138 |
+
ServiceResource: A boto3 resource for the specified service.
|
| 139 |
+
"""
|
| 140 |
+
logger.info("Creating boto3 resource", service=service_name)
|
| 141 |
+
session = _get_session()
|
| 142 |
+
credentials = get_credentials()
|
| 143 |
+
kwargs: dict[str, Any] = {"region_name": config.get_aws_region(), "use_ssl": True}
|
| 144 |
+
if credentials:
|
| 145 |
+
kwargs.update(
|
| 146 |
+
{
|
| 147 |
+
"aws_access_key_id": credentials["AccessKeyId"],
|
| 148 |
+
"aws_secret_access_key": credentials["SecretAccessKey"],
|
| 149 |
+
"aws_session_token": credentials["SessionToken"],
|
| 150 |
+
}
|
| 151 |
+
)
|
| 152 |
+
return session.resource(service_name, **kwargs)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def get_ssm_client() -> SSMClient:
|
| 156 |
+
"""
|
| 157 |
+
Returns an instance of the AWS Systems Manager (SSM) client.
|
| 158 |
+
|
| 159 |
+
This client can be used to interact with the AWS Systems Manager service,
|
| 160 |
+
such as retrieving parameters from the Parameter Store.
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
SSMClient: An SSM client configured for the current execution environment.
|
| 164 |
+
"""
|
| 165 |
+
return cast(SSMClient, _get_boto3_client("ssm"))
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def get_s3_client() -> S3Client:
|
| 169 |
+
"""
|
| 170 |
+
Returns an instance of the AWS S3 client.
|
| 171 |
+
This client can be used to interact with Amazon S3 buckets and objects.
|
| 172 |
+
|
| 173 |
+
Returns:
|
| 174 |
+
S3Client: An S3 client configured for the current execution environment.
|
| 175 |
+
"""
|
| 176 |
+
return cast(S3Client, _get_boto3_client("s3"))
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def get_dynamodb_resource() -> DynamoDBServiceResource:
|
| 180 |
+
"""
|
| 181 |
+
Returns an instance of the AWS DynamoDB resource.
|
| 182 |
+
|
| 183 |
+
This resource can be used to interact with DynamoDB tables and items.
|
| 184 |
+
|
| 185 |
+
Returns:
|
| 186 |
+
DynamoDBServiceResource: A DynamoDB resource configured for the current execution environment.
|
| 187 |
+
"""
|
| 188 |
+
return cast(DynamoDBServiceResource, _get_boto3_resource("dynamodb"))
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def _assume_intermediate_role(role_arn: str, session_name: str) -> dict[str, str]:
|
| 192 |
+
"""
|
| 193 |
+
Assumes an IAM role and returns the temporary credentials.
|
| 194 |
+
|
| 195 |
+
Args:
|
| 196 |
+
role_arn (str): The ARN of the role to assume.
|
| 197 |
+
session_name (str): An identifier for the assumed role session.
|
| 198 |
+
|
| 199 |
+
Returns:
|
| 200 |
+
dict[str, str]: The temporary credentials for the assumed role.
|
| 201 |
+
|
| 202 |
+
Raises:
|
| 203 |
+
ClientError: If there's an error assuming the role.
|
| 204 |
+
"""
|
| 205 |
+
logger.info("Attempting to assume role", role_arn=role_arn)
|
| 206 |
+
try:
|
| 207 |
+
sts_client = _get_boto3_client("sts")
|
| 208 |
+
assumed_role = sts_client.assume_role(RoleArn=role_arn, RoleSessionName=session_name)["Credentials"]
|
| 209 |
+
logger.info("Role assumed successfully")
|
| 210 |
+
return {
|
| 211 |
+
"AccessKeyId": assumed_role["AccessKeyId"],
|
| 212 |
+
"SecretAccessKey": assumed_role["SecretAccessKey"],
|
| 213 |
+
"SessionToken": assumed_role["SessionToken"],
|
| 214 |
+
}
|
| 215 |
+
except ClientError as e:
|
| 216 |
+
logger.error("Error assuming role", error=str(e))
|
| 217 |
+
raise
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def get_bedrock_client() -> BedrockRuntimeClient:
|
| 221 |
+
"""
|
| 222 |
+
Returns a Bedrock client with a specific assumed role session.
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
BedrockRuntimeClient: A Bedrock client with the assumed role session.
|
| 226 |
+
"""
|
| 227 |
+
role_arn = f"arn:aws:iam::{config.get_bedrock_account()}:role/BedrockAccess"
|
| 228 |
+
assumed_role = _assume_intermediate_role(role_arn, "BedrockAssumeRoleSession")
|
| 229 |
+
return cast(
|
| 230 |
+
BedrockRuntimeClient,
|
| 231 |
+
boto3.client(
|
| 232 |
+
"bedrock-runtime",
|
| 233 |
+
aws_access_key_id=assumed_role["AccessKeyId"],
|
| 234 |
+
aws_secret_access_key=assumed_role["SecretAccessKey"],
|
| 235 |
+
aws_session_token=assumed_role["SessionToken"],
|
| 236 |
+
region_name=config.get_aws_region(),
|
| 237 |
+
),
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
class ParameterNotFoundError(Exception):
|
| 242 |
+
"""Raised when a parameter is not found in the Parameter Store."""
|
| 243 |
+
|
| 244 |
+
pass
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
class ParameterStoreAccessError(Exception):
|
| 248 |
+
"""Raised when there's an error accessing the Parameter Store."""
|
| 249 |
+
|
| 250 |
+
pass
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def fetch_from_parameter_store(parameter_name: str, is_secret: bool = False) -> str:
|
| 254 |
+
"""
|
| 255 |
+
Fetches the value of a parameter from AWS Systems Manager Parameter Store.
|
| 256 |
+
|
| 257 |
+
This function retrieves a parameter value, handling various potential errors.
|
| 258 |
+
The 'is_secret' parameter is included for backwards compatibility but does not
|
| 259 |
+
affect the function's behavior, as all parameters are retrieved with decryption.
|
| 260 |
+
|
| 261 |
+
Args:
|
| 262 |
+
parameter_name (str): The name of the parameter to fetch.
|
| 263 |
+
is_secret (bool): Whether the parameter is a secret. Defaults to False.
|
| 264 |
+
|
| 265 |
+
Returns:
|
| 266 |
+
str: The value of the parameter
|
| 267 |
+
|
| 268 |
+
Raises:
|
| 269 |
+
ParameterNotFoundError: If the parameter is not found.
|
| 270 |
+
ParameterStoreAccessError: If there's an error accessing the Parameter Store.
|
| 271 |
+
"""
|
| 272 |
+
logger.info("Fetching parameter from Parameter Store", parameter=parameter_name)
|
| 273 |
+
ssm_client = get_ssm_client()
|
| 274 |
+
try:
|
| 275 |
+
response = ssm_client.get_parameter(Name=parameter_name, WithDecryption=is_secret)
|
| 276 |
+
except ssm_client.exceptions.ParameterNotFound:
|
| 277 |
+
raise ParameterNotFoundError(f"Parameter '{parameter_name}' not found")
|
| 278 |
+
except ClientError as e:
|
| 279 |
+
raise ParameterStoreAccessError(f"Error accessing Parameter Store: {str(e)}")
|
| 280 |
+
|
| 281 |
+
logger.info("Successfully fetched parameter", parameter=parameter_name)
|
| 282 |
+
return str(response["Parameter"].get("Value", ""))
|
template/src/template/shared/config.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from functools import cache
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
import tomli
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _get_src_directory() -> str:
|
| 10 |
+
"""
|
| 11 |
+
Get the absolute path to the src directory.
|
| 12 |
+
|
| 13 |
+
This function traverses up the directory tree from the current file's location
|
| 14 |
+
until it finds the 'src' directory.
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
str: The absolute path to the src directory.
|
| 18 |
+
|
| 19 |
+
Raises:
|
| 20 |
+
FileNotFoundError: If the src directory cannot be found.
|
| 21 |
+
"""
|
| 22 |
+
current_path = Path(__file__).resolve()
|
| 23 |
+
for parent in current_path.parents:
|
| 24 |
+
if parent.name == "src":
|
| 25 |
+
return str(parent)
|
| 26 |
+
raise FileNotFoundError("Could not find 'src' directory in the path.")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _get_config() -> dict[str, Any]:
|
| 30 |
+
"""
|
| 31 |
+
Read and parse the config.toml file.
|
| 32 |
+
|
| 33 |
+
This function looks for the config.toml file in the src/template/shared directory,
|
| 34 |
+
regardless of where it's being called from.
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
dict[str, Any]: The parsed contents of the config.toml file.
|
| 38 |
+
|
| 39 |
+
Raises:
|
| 40 |
+
FileNotFoundError: If the config.toml file cannot be found.
|
| 41 |
+
"""
|
| 42 |
+
src_dir = _get_src_directory()
|
| 43 |
+
config_path = os.path.join(src_dir, "template", "shared", "config.toml")
|
| 44 |
+
if not os.path.exists(config_path):
|
| 45 |
+
raise FileNotFoundError(f"Config file not found at {config_path}")
|
| 46 |
+
with open(config_path, "rb") as f:
|
| 47 |
+
return tomli.load(f)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@cache
|
| 51 |
+
def get_role_arn() -> str:
|
| 52 |
+
"""
|
| 53 |
+
Reads the Github role ARN from the TOML configuration file.
|
| 54 |
+
"""
|
| 55 |
+
config = _get_config()
|
| 56 |
+
role_name = config["aws"]["role_name"]
|
| 57 |
+
account = get_aws_account()
|
| 58 |
+
return f"arn:aws:iam::{account}:role/{role_name}"
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@cache
|
| 62 |
+
def get_aws_account() -> str:
|
| 63 |
+
"""
|
| 64 |
+
Reads the AWS account from the TOML configuration file.
|
| 65 |
+
"""
|
| 66 |
+
config = _get_config()
|
| 67 |
+
return str(config["aws"]["aws_account"])
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
@cache
|
| 71 |
+
def get_bedrock_account() -> str:
|
| 72 |
+
"""
|
| 73 |
+
Reads the Bedrock account from the TOML configuration file.
|
| 74 |
+
"""
|
| 75 |
+
config = _get_config()
|
| 76 |
+
return str(config["aws"]["bedrock_aws_account"])
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def get_aws_region() -> str:
|
| 80 |
+
"""
|
| 81 |
+
Always us-east-1 for now
|
| 82 |
+
"""
|
| 83 |
+
return "us-east-1"
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@cache
|
| 87 |
+
def get_openai_api_key_path() -> str:
|
| 88 |
+
"""
|
| 89 |
+
Reads the OpenAI API key path from the TOML configuration file.
|
| 90 |
+
Key is in AWS parameter store
|
| 91 |
+
"""
|
| 92 |
+
config = _get_config()
|
| 93 |
+
return str(config["openai"]["openai_api_key_parameter_store_path"])
|
template/src/template/shared/config.toml
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[aws]
|
| 2 |
+
role_name = "ReadWrite"
|
| 3 |
+
# Sandbox AWS account, just leave this as-is
|
| 4 |
+
aws_account = "339713096219"
|
| 5 |
+
shared_services_aws_account = "851725506657"
|
| 6 |
+
bedrock_aws_account = "339713101814"
|
| 7 |
+
|
| 8 |
+
[openai]
|
| 9 |
+
openai_api_key_parameter_store_path = "/secrets/openai/api_key"
|
template/src/template/shared/logger_factory.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
import structlog
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def setup_logging(log_level: str = "INFO", log_file: str | None = None) -> None:
|
| 9 |
+
"""
|
| 10 |
+
Set up structured logging for the application.
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
log_level (str): The logging level. Defaults to "INFO".
|
| 14 |
+
log_file (str | None): The path to the log file. If None, logs to stdout.
|
| 15 |
+
"""
|
| 16 |
+
logging.basicConfig(level=log_level)
|
| 17 |
+
|
| 18 |
+
processors = [
|
| 19 |
+
structlog.contextvars.merge_contextvars,
|
| 20 |
+
structlog.processors.add_log_level,
|
| 21 |
+
structlog.processors.StackInfoRenderer(),
|
| 22 |
+
structlog.processors.TimeStamper(fmt="iso"),
|
| 23 |
+
structlog.processors.dict_tracebacks,
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
handler: logging.Handler
|
| 27 |
+
|
| 28 |
+
if log_file:
|
| 29 |
+
handler = logging.FileHandler(log_file)
|
| 30 |
+
processors.append(structlog.processors.JSONRenderer())
|
| 31 |
+
else:
|
| 32 |
+
handler = logging.StreamHandler(sys.stdout)
|
| 33 |
+
processors.append(structlog.dev.ConsoleRenderer())
|
| 34 |
+
|
| 35 |
+
structlog.configure(
|
| 36 |
+
processors=processors, # type: ignore
|
| 37 |
+
logger_factory=structlog.stdlib.LoggerFactory(),
|
| 38 |
+
wrapper_class=structlog.stdlib.BoundLogger,
|
| 39 |
+
cache_logger_on_first_use=True,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
root_logger = logging.getLogger()
|
| 43 |
+
root_logger.addHandler(handler)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_logger(name: str) -> structlog.stdlib.BoundLogger:
|
| 47 |
+
"""
|
| 48 |
+
Get a logger instance with the specified name.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
name (str): The name of the logger.
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
structlog.stdlib.BoundLogger: The logger instance.
|
| 55 |
+
"""
|
| 56 |
+
return structlog.get_logger(name) # type: ignore
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def bind_extra(logger: structlog.stdlib.BoundLogger, **kwargs: Any) -> structlog.stdlib.BoundLogger:
|
| 60 |
+
"""
|
| 61 |
+
Bind extra key-value pairs to the logger.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
logger (structlog.stdlib.BoundLogger): The logger instance.
|
| 65 |
+
**kwargs: Key-value pairs to bind to the logger.
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
structlog.stdlib.BoundLogger: The logger with bound extra information.
|
| 69 |
+
"""
|
| 70 |
+
return logger.bind(**kwargs)
|
template/tests/__init__.py
ADDED
|
File without changes
|
template/tests/app/__init__.py
ADDED
|
File without changes
|
template/tests/app/prompts/__init__.py
ADDED
|
File without changes
|
template/tests/app/prompts/test_prompt_loader.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from template.app.prompts.prompt_loader import PromptLoader
|
| 6 |
+
from template.llm.openai.openai import AsyncOpenAIService
|
| 7 |
+
from template.llm.openai.openai_model import OpenAIModel
|
| 8 |
+
from template.llm.prompt import Prompt
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def get_test_prompts_path():
|
| 12 |
+
return Path(__file__).parent / "test_prompts"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@pytest.fixture
|
| 16 |
+
def prompt_loader():
|
| 17 |
+
return PromptLoader(get_test_prompts_path())
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_load_template_success(prompt_loader):
|
| 21 |
+
template = prompt_loader.load_template("basic_test/1 - test")
|
| 22 |
+
assert template.name == "1 - test"
|
| 23 |
+
assert template.system_prompt.strip() == "This is a system prompt."
|
| 24 |
+
assert template.user_prompt.strip() == "This is a user prompt."
|
| 25 |
+
assert template.partial_assistant_prompt is None
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def test_load_template_multiple_prompts(prompt_loader):
|
| 29 |
+
template = prompt_loader.load_template("basic_test/2 - test2")
|
| 30 |
+
assert template.name == "2 - test2"
|
| 31 |
+
assert template.user_prompt.strip() == "This is another user prompt."
|
| 32 |
+
assert template.system_prompt is None
|
| 33 |
+
assert template.partial_assistant_prompt is None
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def test_load_template_nonexistent(prompt_loader):
|
| 37 |
+
with pytest.raises(FileNotFoundError):
|
| 38 |
+
prompt_loader.load_template("nonexistent/prompt")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def test_create_prompt(prompt_loader):
|
| 42 |
+
template = prompt_loader.load_template("basic_test/1 - test")
|
| 43 |
+
llm_service = AsyncOpenAIService(OpenAIModel.GPT_4_MINI)
|
| 44 |
+
prompt = prompt_loader.create_prompt(
|
| 45 |
+
template,
|
| 46 |
+
llm_service=llm_service,
|
| 47 |
+
max_tokens=500,
|
| 48 |
+
temperature=0.7,
|
| 49 |
+
output_formatter=lambda x: {"result": x},
|
| 50 |
+
variable="test_value",
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
assert isinstance(prompt, Prompt)
|
| 54 |
+
assert prompt._llm_service == llm_service
|
| 55 |
+
assert prompt._max_tokens == 500
|
| 56 |
+
assert prompt._temperature == 0.7
|
| 57 |
+
assert callable(prompt._output_formatter)
|
| 58 |
+
assert prompt._system_prompt.get_prompt() == "This is a system prompt."
|
| 59 |
+
assert prompt._user_prompt.get_prompt() == "This is a user prompt."
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def test_create_prompt_without_system_prompt(prompt_loader):
|
| 63 |
+
template = prompt_loader.load_template("basic_test/2 - test2")
|
| 64 |
+
llm_service = AsyncOpenAIService(OpenAIModel.GPT_4_MINI)
|
| 65 |
+
prompt = prompt_loader.create_prompt(template, llm_service=llm_service)
|
| 66 |
+
|
| 67 |
+
assert isinstance(prompt, Prompt)
|
| 68 |
+
assert prompt._system_prompt is None
|
| 69 |
+
assert prompt._user_prompt.get_prompt() == "This is another user prompt."
|
template/tests/app/prompts/test_prompts/basic_test/1 - test_human.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
This is a user prompt.
|
template/tests/app/prompts/test_prompts/basic_test/1 - test_system.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
This is a system prompt.
|
template/tests/app/prompts/test_prompts/basic_test/2 - test2_human.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
This is another user prompt.
|
template/tests/app/test_education_classifier.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from unittest.mock import AsyncMock, MagicMock
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from template.app.education_classifier import EducationClassification, EducationClassifier, SchoolType
|
| 6 |
+
from template.app.model.linkedin.linkedin_models import DateComponent, Education, LinkedinProfile
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@pytest.fixture
|
| 10 |
+
def mock_llm_service():
|
| 11 |
+
return AsyncMock()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@pytest.fixture
|
| 15 |
+
def mock_prompt_loader():
|
| 16 |
+
loader = MagicMock()
|
| 17 |
+
loader.load_template.return_value = MagicMock()
|
| 18 |
+
loader.create_prompt.return_value = AsyncMock()
|
| 19 |
+
return loader
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@pytest.fixture
|
| 23 |
+
def education_classifier(mock_llm_service, mock_prompt_loader):
|
| 24 |
+
return EducationClassifier(llm_service=mock_llm_service, prompt_loader=mock_prompt_loader)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@pytest.fixture
|
| 28 |
+
def sample_linkedin_profile():
|
| 29 |
+
return LinkedinProfile(
|
| 30 |
+
first_name="John",
|
| 31 |
+
last_name="Doe",
|
| 32 |
+
educations=[
|
| 33 |
+
Education(
|
| 34 |
+
school_name="Stanford University",
|
| 35 |
+
degree="Master of Business Administration",
|
| 36 |
+
field_of_study="Business Administration",
|
| 37 |
+
start=DateComponent(year=2018),
|
| 38 |
+
end=DateComponent(year=2020),
|
| 39 |
+
)
|
| 40 |
+
],
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@pytest.mark.asyncio
|
| 45 |
+
async def test_classify_education(education_classifier, sample_linkedin_profile, mock_prompt_loader):
|
| 46 |
+
mock_prompt = mock_prompt_loader.create_prompt.return_value
|
| 47 |
+
mock_prompt.evaluate.return_value = EducationClassification(
|
| 48 |
+
output=SchoolType.MBA,
|
| 49 |
+
confidence=0.95,
|
| 50 |
+
reasoning="This is a Master of Business Administration degree from Stanford University.",
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
result = await education_classifier.classify_education(
|
| 54 |
+
sample_linkedin_profile, sample_linkedin_profile.educations[0]
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
assert isinstance(result, EducationClassification)
|
| 58 |
+
assert result.output == SchoolType.MBA
|
| 59 |
+
assert result.confidence == 0.95
|
| 60 |
+
assert "Master of Business Administration" in result.reasoning
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
@pytest.mark.parametrize(
|
| 64 |
+
"output,expected",
|
| 65 |
+
[
|
| 66 |
+
("PRIMARY_SECONDARY", SchoolType.PRIMARY_SECONDARY),
|
| 67 |
+
("UNDERGRAD_INCOMPLETE", SchoolType.UNDERGRAD_INCOMPLETE),
|
| 68 |
+
("UNDERGRAD_COMPLETED", SchoolType.UNDERGRAD_COMPLETED),
|
| 69 |
+
("MBA", SchoolType.MBA),
|
| 70 |
+
("LAW_SCHOOL", SchoolType.LAW_SCHOOL),
|
| 71 |
+
("GRAD_SCHOOL", SchoolType.GRAD_SCHOOL),
|
| 72 |
+
("PHD", SchoolType.PHD),
|
| 73 |
+
("OTHER", SchoolType.OTHER),
|
| 74 |
+
],
|
| 75 |
+
)
|
| 76 |
+
def test_parse_output(output, expected):
|
| 77 |
+
parsed = EducationClassifier._parse_output(f"output: {output}\nconfidence: 0.9\nreasoning: Test reasoning")
|
| 78 |
+
assert parsed.output == expected
|
| 79 |
+
assert parsed.confidence == 0.9
|
| 80 |
+
assert parsed.reasoning == "Test reasoning"
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def test_parse_output_invalid():
|
| 84 |
+
with pytest.raises(ValueError):
|
| 85 |
+
EducationClassifier._parse_output("output: INVALID\nconfidence: 0.9\nreasoning: Test reasoning")
|
template/tests/app/test_work_experience_classifier.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from unittest.mock import AsyncMock, MagicMock
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from template.app.model.linkedin.linkedin_models import DateComponent, LinkedinProfile, Position
|
| 6 |
+
from template.app.work_experience_classifier import (
|
| 7 |
+
PrimaryJobType,
|
| 8 |
+
SecondaryJobType,
|
| 9 |
+
WorkExperienceClassification,
|
| 10 |
+
WorkExperienceClassifier,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@pytest.fixture
|
| 15 |
+
def mock_llm_service():
|
| 16 |
+
return AsyncMock()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@pytest.fixture
|
| 20 |
+
def mock_prompt_loader():
|
| 21 |
+
loader = MagicMock()
|
| 22 |
+
loader.load_template.return_value = MagicMock()
|
| 23 |
+
loader.create_prompt.return_value = AsyncMock()
|
| 24 |
+
return loader
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@pytest.fixture
|
| 28 |
+
def work_experience_classifier(mock_llm_service, mock_prompt_loader):
|
| 29 |
+
return WorkExperienceClassifier(llm_service=mock_llm_service, prompt_loader=mock_prompt_loader)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@pytest.fixture
|
| 33 |
+
def sample_linkedin_profile():
|
| 34 |
+
return LinkedinProfile(
|
| 35 |
+
first_name="Jane",
|
| 36 |
+
last_name="Smith",
|
| 37 |
+
position=[
|
| 38 |
+
Position(
|
| 39 |
+
title="Software Engineer",
|
| 40 |
+
company_name="Tech Corp",
|
| 41 |
+
start=DateComponent(year=2018, month=1),
|
| 42 |
+
end=DateComponent(year=2020, month=12),
|
| 43 |
+
)
|
| 44 |
+
],
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@pytest.mark.asyncio
|
| 49 |
+
async def test_classify_work_experience(work_experience_classifier, sample_linkedin_profile, mock_prompt_loader):
|
| 50 |
+
mock_prompt = mock_prompt_loader.create_prompt.return_value
|
| 51 |
+
mock_prompt.evaluate.return_value = WorkExperienceClassification(
|
| 52 |
+
primary_job_type=PrimaryJobType.FULL_TIME,
|
| 53 |
+
secondary_job_type=SecondaryJobType.ENGINEERING,
|
| 54 |
+
confidence=0.95,
|
| 55 |
+
reasoning="This is a full-time software engineering position.",
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
result = await work_experience_classifier.classify_work_experience(
|
| 59 |
+
sample_linkedin_profile, sample_linkedin_profile.positions[0]
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
assert isinstance(result, WorkExperienceClassification)
|
| 63 |
+
assert result.primary_job_type == PrimaryJobType.FULL_TIME
|
| 64 |
+
assert result.secondary_job_type == SecondaryJobType.ENGINEERING
|
| 65 |
+
assert result.confidence == 0.95
|
| 66 |
+
assert "full-time software engineering" in result.reasoning
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@pytest.mark.parametrize(
|
| 70 |
+
"primary,secondary,expected_primary,expected_secondary",
|
| 71 |
+
[
|
| 72 |
+
("FULL_TIME", "INVESTING", PrimaryJobType.FULL_TIME, SecondaryJobType.INVESTING),
|
| 73 |
+
(
|
| 74 |
+
"ADVISORY_BOARD_INVESTOR",
|
| 75 |
+
"BACK_OFFICE",
|
| 76 |
+
PrimaryJobType.ADVISORY_BOARD_INVESTOR,
|
| 77 |
+
SecondaryJobType.BACK_OFFICE,
|
| 78 |
+
),
|
| 79 |
+
("INTERNSHIP", "INVESTMENT_BANKING", PrimaryJobType.INTERNSHIP, SecondaryJobType.INVESTMENT_BANKING),
|
| 80 |
+
("EXTRACURRICULAR", "CONSULTING", PrimaryJobType.EXTRACURRICULAR, SecondaryJobType.CONSULTING),
|
| 81 |
+
("EDUCATION", "ENGINEERING", PrimaryJobType.EDUCATION, SecondaryJobType.ENGINEERING),
|
| 82 |
+
("OTHER", "ENTREPRENEUR_FOUNDER", PrimaryJobType.OTHER, SecondaryJobType.ENTREPRENEUR_FOUNDER),
|
| 83 |
+
],
|
| 84 |
+
)
|
| 85 |
+
def test_parse_output(primary, secondary, expected_primary, expected_secondary):
|
| 86 |
+
output = f"primary_job_type: {primary}\nsecondary_job_type: {secondary}\nconfidence: 0.9\nreasoning: Test reasoning"
|
| 87 |
+
parsed = WorkExperienceClassifier._parse_output(output)
|
| 88 |
+
assert parsed.primary_job_type == expected_primary
|
| 89 |
+
assert parsed.secondary_job_type == expected_secondary
|
| 90 |
+
assert parsed.confidence == 0.9
|
| 91 |
+
assert parsed.reasoning == "Test reasoning"
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def test_parse_output_invalid():
|
| 95 |
+
with pytest.raises(ValueError):
|
| 96 |
+
WorkExperienceClassifier._parse_output(
|
| 97 |
+
"primary_job_type: INVALID\nsecondary_job_type: OTHER\nconfidence: 0.9\nreasoning: Test reasoning"
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def test_parse_output_invalid_confidence():
|
| 102 |
+
with pytest.raises(ValueError):
|
| 103 |
+
WorkExperienceClassifier._parse_output(
|
| 104 |
+
"primary_job_type: FULL_TIME\nsecondary_job_type: OTHER\nconfidence: invalid\nreasoning: Test reasoning"
|
| 105 |
+
)
|