Spaces:
Runtime error
Runtime error
| [tool.poetry] | |
| name = "llm-engineering" | |
| version = "0.1.0" | |
| description = "" | |
| authors = ["iusztinpaul <[email protected]>"] | |
| license = "MIT" | |
| readme = "README.md" | |
| [tool.poetry.dependencies] | |
| python = "~3.11" | |
| zenml = { version = "0.74.0", extras = ["server"] } | |
| pymongo = "^4.6.2" | |
| click = "^8.0.1" | |
| loguru = "^0.7.2" | |
| rich = "^13.7.1" | |
| numpy = "^1.26.4" | |
| poethepoet = "0.29.0" | |
| datasets = "^3.0.1" | |
| torch = "2.2.2" | |
| # Digital data ETL | |
| selenium = "^4.21.0" | |
| webdriver-manager = "^4.0.1" | |
| beautifulsoup4 = "^4.12.3" | |
| html2text = "^2024.2.26" | |
| jmespath = "^1.0.1" | |
| chromedriver-autoinstaller = "^0.6.4" | |
| # Feature engineering | |
| qdrant-client = "^1.8.0" | |
| langchain = "^0.2.11" | |
| sentence-transformers = "^3.0.0" | |
| # RAG | |
| langchain-openai = "^0.1.3" | |
| jinja2 = "^3.1.4" | |
| tiktoken = "^0.7.0" | |
| fake-useragent = "^1.5.1" | |
| langchain-community = "^0.2.11" | |
| # Inference | |
| fastapi = ">=0.100,<=0.110" | |
| uvicorn = "^0.30.6" | |
| opik = "^0.2.2" | |
| [tool.poetry.group.dev.dependencies] | |
| ruff = "^0.4.9" | |
| pre-commit = "^3.7.1" | |
| pytest = "^8.2.2" | |
| ipykernel = "^6.29.5" | |
| [tool.poetry.group.aws.dependencies] | |
| sagemaker = ">=2.232.2" | |
| s3fs = ">2022.3.0" | |
| aws-profile-manager = "^0.7.3" | |
| kubernetes = "^30.1.0" | |
| sagemaker-huggingface-inference-toolkit = "^2.4.0" | |
| [build-system] | |
| requires = ["poetry-core"] | |
| build-backend = "poetry.core.masonry.api" | |
| # ---------------------------------- | |
| # --- Poe the Poet Configuration --- | |
| # ---------------------------------- | |
| [tool.poe.tasks] | |
| # Data pipelines | |
| run-digital-data-etl-alex = "echo 'It is not supported anymore.'" | |
| run-digital-data-etl-maxime = "poetry run python -m tools.run --run-etl --no-cache --etl-config-filename digital_data_etl_maxime_labonne.yaml" | |
| run-digital-data-etl-paul = "poetry run python -m tools.run --run-etl --no-cache --etl-config-filename digital_data_etl_paul_iusztin.yaml" | |
| run-digital-data-etl = [ | |
| "run-digital-data-etl-maxime", | |
| "run-digital-data-etl-paul", | |
| ] | |
| run-feature-engineering-pipeline = "poetry run python -m tools.run --no-cache --run-feature-engineering" | |
| run-generate-instruct-datasets-pipeline = "poetry run python -m tools.run --no-cache --run-generate-instruct-datasets" | |
| run-generate-preference-datasets-pipeline = "poetry run python -m tools.run --no-cache --run-generate-preference-datasets" | |
| run-end-to-end-data-pipeline = "poetry run python -m tools.run --no-cache --run-end-to-end-data" | |
| # Utility pipelines | |
| run-export-artifact-to-json-pipeline = "poetry run python -m tools.run --no-cache --run-export-artifact-to-json" | |
| run-export-data-warehouse-to-json = "poetry run python -m tools.data_warehouse --export-raw-data" | |
| run-import-data-warehouse-from-json = "poetry run python -m tools.data_warehouse --import-raw-data" | |
| # Training pipelines | |
| run-training-pipeline = "poetry run python -m tools.run --no-cache --run-training" | |
| run-evaluation-pipeline = "poetry run python -m tools.run --no-cache --run-evaluation" | |
| # Inference | |
| call-rag-retrieval-module = "poetry run python -m tools.rag" | |
| run-inference-ml-service = "poetry run uvicorn tools.ml_service:app --host 0.0.0.0 --port 8000 --reload" | |
| call-inference-ml-service = "curl -X POST 'http://127.0.0.1:8000/rag' -H 'Content-Type: application/json' -d '{\"query\": \"My name is Paul Iusztin. Could you draft a LinkedIn post discussing RAG systems? I am particularly interested in how RAG works and how it is integrated with vector DBs and LLMs.\"}'" | |
| # Infrastructure | |
| ## Local infrastructure | |
| local-docker-infrastructure-up = "docker compose up -d" | |
| local-docker-infrastructure-down = "docker compose stop" | |
| local-zenml-server-down = "poetry run zenml logout --local" | |
| local-infrastructure-up = [ | |
| "local-docker-infrastructure-up", | |
| "local-zenml-server-down", | |
| "local-zenml-server-up", | |
| ] | |
| local-infrastructure-down = [ | |
| "local-docker-infrastructure-down", | |
| "local-zenml-server-down", | |
| ] | |
| set-local-stack = "poetry run zenml stack set default" | |
| set-aws-stack = "poetry run zenml stack set aws-stack" | |
| set-asynchronous-runs = "poetry run zenml orchestrator update aws-stack --synchronous=False" | |
| zenml-server-disconnect = "poetry run zenml disconnect" | |
| ## Settings | |
| export-settings-to-zenml = "poetry run python -m tools.run --export-settings" | |
| delete-settings-zenml = "poetry run zenml secret delete settings" | |
| ## SageMaker | |
| create-sagemaker-role = "poetry run python -m llm_engineering.infrastructure.aws.roles.create_sagemaker_role" | |
| create-sagemaker-execution-role = "poetry run python -m llm_engineering.infrastructure.aws.roles.create_execution_role" | |
| deploy-inference-endpoint = "poetry run python -m llm_engineering.infrastructure.aws.deploy.huggingface.run" | |
| test-sagemaker-endpoint = "poetry run python -m llm_engineering.model.inference.test" | |
| delete-inference-endpoint = "poetry run python -m llm_engineering.infrastructure.aws.deploy.delete_sagemaker_endpoint" | |
| ## Docker | |
| build-docker-image = "docker buildx build --platform linux/amd64 -t llmtwin -f Dockerfile ." | |
| run-docker-end-to-end-data-pipeline = "docker run --rm --network host --shm-size=2g --env-file .env llmtwin poetry poe --no-cache --run-end-to-end-data" | |
| bash-docker-container = "docker run --rm -it --network host --env-file .env llmtwin bash" | |
| # QA | |
| lint-check = "poetry run ruff check ." | |
| format-check = "poetry run ruff format --check ." | |
| lint-check-docker = "sh -c 'docker run --rm -i hadolint/hadolint < Dockerfile'" | |
| gitleaks-check = "docker run -v .:/src zricethezav/gitleaks:latest dir /src/llm_engineering" | |
| lint-fix = "poetry run ruff check --fix ." | |
| format-fix = "poetry run ruff format ." | |
| [tool.poe.tasks.local-zenml-server-up] | |
| control.expr = "sys.platform" | |
| [[tool.poe.tasks.local-zenml-server-up.switch]] | |
| case = "darwin" | |
| env = { OBJC_DISABLE_INITIALIZE_FORK_SAFETY = "YES" } | |
| cmd = "poetry run zenml login --local" | |
| [[tool.poe.tasks.local-zenml-server-up.switch]] | |
| case = "win32" | |
| cmd = "poetry run zenml login --local --blocking" | |
| [[tool.poe.tasks.local-zenml-server-up.switch]] | |
| cmd = "poetry run zenml login --local" | |
| # Tests | |
| [tool.poe.tasks.test] | |
| cmd = "poetry run pytest tests/" | |
| env = { ENV_FILE = ".env.testing" } | |