Spaces:
Sleeping
Sleeping
feat: final
Browse files- .chainlit/config.toml +78 -0
- .env.sample +2 -0
- .gitignore +166 -0
- .vscode/settings.json +0 -1
- app/app.py +207 -0
- app/prompt.py +26 -0
- chainlit.md +8 -0
- requirements.txt +8 -0
- sample_pdf/NVDA 2QFY24.pdf +0 -0
.chainlit/config.toml
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
# Whether to enable telemetry (default: true). No personal data is collected.
|
| 3 |
+
enable_telemetry = true
|
| 4 |
+
|
| 5 |
+
# List of environment variables to be provided by each user to use the app.
|
| 6 |
+
user_env = []
|
| 7 |
+
|
| 8 |
+
# Duration (in seconds) during which the session is saved when the connection is lost
|
| 9 |
+
session_timeout = 3600
|
| 10 |
+
|
| 11 |
+
# Enable third parties caching (e.g LangChain cache)
|
| 12 |
+
cache = false
|
| 13 |
+
|
| 14 |
+
# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
|
| 15 |
+
# follow_symlink = false
|
| 16 |
+
|
| 17 |
+
[features]
|
| 18 |
+
# Show the prompt playground
|
| 19 |
+
prompt_playground = true
|
| 20 |
+
|
| 21 |
+
# Authorize users to upload files with messages
|
| 22 |
+
multi_modal = true
|
| 23 |
+
|
| 24 |
+
# Allows user to use speech to text
|
| 25 |
+
[features.speech_to_text]
|
| 26 |
+
enabled = false
|
| 27 |
+
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
|
| 28 |
+
# language = "en-US"
|
| 29 |
+
|
| 30 |
+
[UI]
|
| 31 |
+
# Name of the app and chatbot.
|
| 32 |
+
name = "Chatbot"
|
| 33 |
+
|
| 34 |
+
# Show the readme while the conversation is empty.
|
| 35 |
+
show_readme_as_default = true
|
| 36 |
+
|
| 37 |
+
# Description of the app and chatbot. This is used for HTML tags.
|
| 38 |
+
# description = ""
|
| 39 |
+
|
| 40 |
+
# Large size content are by default collapsed for a cleaner ui
|
| 41 |
+
default_collapse_content = true
|
| 42 |
+
|
| 43 |
+
# The default value for the expand messages settings.
|
| 44 |
+
default_expand_messages = false
|
| 45 |
+
|
| 46 |
+
# Hide the chain of thought details from the user in the UI.
|
| 47 |
+
hide_cot = false
|
| 48 |
+
|
| 49 |
+
# Link to your github repo. This will add a github button in the UI's header.
|
| 50 |
+
github = "https://github.com/LinkedInLearning/hands-on-ai-building-and-deploying-llm-powered-apps-4511409"
|
| 51 |
+
|
| 52 |
+
# Specify a CSS file that can be used to customize the user interface.
|
| 53 |
+
# The CSS file can be served from the public directory or via an external link.
|
| 54 |
+
# custom_css = "/public/test.css"
|
| 55 |
+
|
| 56 |
+
# Override default MUI light theme. (Check theme.ts)
|
| 57 |
+
[UI.theme.light]
|
| 58 |
+
#background = "#FAFAFA"
|
| 59 |
+
#paper = "#FFFFFF"
|
| 60 |
+
|
| 61 |
+
[UI.theme.light.primary]
|
| 62 |
+
#main = "#F80061"
|
| 63 |
+
#dark = "#980039"
|
| 64 |
+
#light = "#FFE7EB"
|
| 65 |
+
|
| 66 |
+
# Override default MUI dark theme. (Check theme.ts)
|
| 67 |
+
[UI.theme.dark]
|
| 68 |
+
#background = "#FAFAFA"
|
| 69 |
+
#paper = "#FFFFFF"
|
| 70 |
+
|
| 71 |
+
[UI.theme.dark.primary]
|
| 72 |
+
#main = "#F80061"
|
| 73 |
+
#dark = "#980039"
|
| 74 |
+
#light = "#FFE7EB"
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
[meta]
|
| 78 |
+
generated_by = "0.7.501"
|
.env.sample
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ALLOW_RESET=TRUE
|
| 2 |
+
OPENAI_API_KEY="sk-your-openai-api-key"
|
.gitignore
CHANGED
|
@@ -1,4 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
.DS_Store
|
| 2 |
node_modules
|
| 3 |
.tmp
|
| 4 |
npm-debug.log
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ruff
|
| 2 |
+
.ruff_cache/
|
| 3 |
+
|
| 4 |
+
# Chainlit
|
| 5 |
+
.chainlit/.langchain.db
|
| 6 |
+
|
| 7 |
+
# Chroma
|
| 8 |
+
.chromadb/
|
| 9 |
+
|
| 10 |
.DS_Store
|
| 11 |
node_modules
|
| 12 |
.tmp
|
| 13 |
npm-debug.log
|
| 14 |
+
|
| 15 |
+
# VSCode
|
| 16 |
+
.vscode/
|
| 17 |
+
|
| 18 |
+
# Byte-compiled / optimized / DLL files
|
| 19 |
+
__pycache__/
|
| 20 |
+
*.py[cod]
|
| 21 |
+
*$py.class
|
| 22 |
+
|
| 23 |
+
# C extensions
|
| 24 |
+
*.so
|
| 25 |
+
|
| 26 |
+
# Distribution / packaging
|
| 27 |
+
.Python
|
| 28 |
+
build/
|
| 29 |
+
develop-eggs/
|
| 30 |
+
dist/
|
| 31 |
+
downloads/
|
| 32 |
+
eggs/
|
| 33 |
+
.eggs/
|
| 34 |
+
lib/
|
| 35 |
+
lib64/
|
| 36 |
+
parts/
|
| 37 |
+
sdist/
|
| 38 |
+
var/
|
| 39 |
+
wheels/
|
| 40 |
+
share/python-wheels/
|
| 41 |
+
*.egg-info/
|
| 42 |
+
.installed.cfg
|
| 43 |
+
*.egg
|
| 44 |
+
MANIFEST
|
| 45 |
+
|
| 46 |
+
# PyInstaller
|
| 47 |
+
# Usually these files are written by a python script from a template
|
| 48 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 49 |
+
*.manifest
|
| 50 |
+
*.spec
|
| 51 |
+
|
| 52 |
+
# Installer logs
|
| 53 |
+
pip-log.txt
|
| 54 |
+
pip-delete-this-directory.txt
|
| 55 |
+
|
| 56 |
+
# Unit test / coverage reports
|
| 57 |
+
htmlcov/
|
| 58 |
+
.tox/
|
| 59 |
+
.nox/
|
| 60 |
+
.coverage
|
| 61 |
+
.coverage.*
|
| 62 |
+
.cache
|
| 63 |
+
nosetests.xml
|
| 64 |
+
coverage.xml
|
| 65 |
+
*.cover
|
| 66 |
+
*.py,cover
|
| 67 |
+
.hypothesis/
|
| 68 |
+
.pytest_cache/
|
| 69 |
+
cover/
|
| 70 |
+
|
| 71 |
+
# Translations
|
| 72 |
+
*.mo
|
| 73 |
+
*.pot
|
| 74 |
+
|
| 75 |
+
# Django stuff:
|
| 76 |
+
*.log
|
| 77 |
+
local_settings.py
|
| 78 |
+
db.sqlite3
|
| 79 |
+
db.sqlite3-journal
|
| 80 |
+
|
| 81 |
+
# Flask stuff:
|
| 82 |
+
instance/
|
| 83 |
+
.webassets-cache
|
| 84 |
+
|
| 85 |
+
# Scrapy stuff:
|
| 86 |
+
.scrapy
|
| 87 |
+
|
| 88 |
+
# Sphinx documentation
|
| 89 |
+
docs/_build/
|
| 90 |
+
|
| 91 |
+
# PyBuilder
|
| 92 |
+
.pybuilder/
|
| 93 |
+
target/
|
| 94 |
+
|
| 95 |
+
# Jupyter Notebook
|
| 96 |
+
.ipynb_checkpoints
|
| 97 |
+
|
| 98 |
+
# IPython
|
| 99 |
+
profile_default/
|
| 100 |
+
ipython_config.py
|
| 101 |
+
|
| 102 |
+
# pyenv
|
| 103 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 104 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 105 |
+
# .python-version
|
| 106 |
+
|
| 107 |
+
# pipenv
|
| 108 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 109 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 110 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 111 |
+
# install all needed dependencies.
|
| 112 |
+
#Pipfile.lock
|
| 113 |
+
|
| 114 |
+
# poetry
|
| 115 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 116 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 117 |
+
# commonly ignored for libraries.
|
| 118 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 119 |
+
#poetry.lock
|
| 120 |
+
|
| 121 |
+
# pdm
|
| 122 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 123 |
+
#pdm.lock
|
| 124 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 125 |
+
# in version control.
|
| 126 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 127 |
+
.pdm.toml
|
| 128 |
+
|
| 129 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 130 |
+
__pypackages__/
|
| 131 |
+
|
| 132 |
+
# Celery stuff
|
| 133 |
+
celerybeat-schedule
|
| 134 |
+
celerybeat.pid
|
| 135 |
+
|
| 136 |
+
# SageMath parsed files
|
| 137 |
+
*.sage.py
|
| 138 |
+
|
| 139 |
+
# Environments
|
| 140 |
+
.env
|
| 141 |
+
.venv
|
| 142 |
+
env/
|
| 143 |
+
venv/
|
| 144 |
+
ENV/
|
| 145 |
+
env.bak/
|
| 146 |
+
venv.bak/
|
| 147 |
+
|
| 148 |
+
# Spyder project settings
|
| 149 |
+
.spyderproject
|
| 150 |
+
.spyproject
|
| 151 |
+
|
| 152 |
+
# Rope project settings
|
| 153 |
+
.ropeproject
|
| 154 |
+
|
| 155 |
+
# mkdocs documentation
|
| 156 |
+
/site
|
| 157 |
+
|
| 158 |
+
# mypy
|
| 159 |
+
.mypy_cache/
|
| 160 |
+
.dmypy.json
|
| 161 |
+
dmypy.json
|
| 162 |
+
|
| 163 |
+
# Pyre type checker
|
| 164 |
+
.pyre/
|
| 165 |
+
|
| 166 |
+
# pytype static type analyzer
|
| 167 |
+
.pytype/
|
| 168 |
+
|
| 169 |
+
# Cython debug symbols
|
| 170 |
+
cython_debug/
|
.vscode/settings.json
CHANGED
|
@@ -17,7 +17,6 @@
|
|
| 17 |
"files.autoSave": "afterDelay",
|
| 18 |
"screencastMode.onlyKeyboardShortcuts": true,
|
| 19 |
"terminal.integrated.fontSize": 18,
|
| 20 |
-
"workbench.activityBar.visible": true,
|
| 21 |
"workbench.colorTheme": "Visual Studio Dark",
|
| 22 |
"workbench.fontAliasing": "antialiased",
|
| 23 |
"workbench.statusBar.visible": true
|
|
|
|
| 17 |
"files.autoSave": "afterDelay",
|
| 18 |
"screencastMode.onlyKeyboardShortcuts": true,
|
| 19 |
"terminal.integrated.fontSize": 18,
|
|
|
|
| 20 |
"workbench.colorTheme": "Visual Studio Dark",
|
| 21 |
"workbench.fontAliasing": "antialiased",
|
| 22 |
"workbench.statusBar.visible": true
|
app/app.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Chroma compatibility issue resolution
|
| 2 |
+
# https://docs.trychroma.com/troubleshooting#sqlite
|
| 3 |
+
__import__('pysqlite3')
|
| 4 |
+
import sys
|
| 5 |
+
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
|
| 6 |
+
|
| 7 |
+
from tempfile import NamedTemporaryFile
|
| 8 |
+
|
| 9 |
+
import chainlit as cl
|
| 10 |
+
from chainlit.types import AskFileResponse
|
| 11 |
+
|
| 12 |
+
import chromadb
|
| 13 |
+
from chromadb.config import Settings
|
| 14 |
+
from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain
|
| 15 |
+
from langchain.chains.base import Chain
|
| 16 |
+
from langchain.chat_models import ChatOpenAI
|
| 17 |
+
from langchain.document_loaders import PDFPlumberLoader
|
| 18 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 19 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 20 |
+
from langchain.vectorstores import Chroma
|
| 21 |
+
from langchain.vectorstores.base import VectorStore
|
| 22 |
+
|
| 23 |
+
from prompt import EXAMPLE_PROMPT, PROMPT, WELCOME_MESSAGE
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
namespaces = set()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def process_file(*, file: AskFileResponse) -> list:
|
| 30 |
+
if file.type != "application/pdf":
|
| 31 |
+
raise TypeError("Only PDF files are supported")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
with NamedTemporaryFile() as tempfile:
|
| 35 |
+
tempfile.write(file.content)
|
| 36 |
+
|
| 37 |
+
######################################################################
|
| 38 |
+
#
|
| 39 |
+
# 1. Load the PDF
|
| 40 |
+
#
|
| 41 |
+
######################################################################
|
| 42 |
+
loader = PDFPlumberLoader(tempfile.name)
|
| 43 |
+
|
| 44 |
+
######################################################################
|
| 45 |
+
documents = loader.load()
|
| 46 |
+
|
| 47 |
+
######################################################################
|
| 48 |
+
#
|
| 49 |
+
# 2. Split the text
|
| 50 |
+
#
|
| 51 |
+
######################################################################
|
| 52 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 53 |
+
chunk_size=3000,
|
| 54 |
+
chunk_overlap=100
|
| 55 |
+
)
|
| 56 |
+
######################################################################
|
| 57 |
+
|
| 58 |
+
docs = text_splitter.split_documents(documents)
|
| 59 |
+
|
| 60 |
+
for i, doc in enumerate(docs):
|
| 61 |
+
doc.metadata["source"] = f"source_{i}"
|
| 62 |
+
|
| 63 |
+
if not docs:
|
| 64 |
+
raise ValueError("PDF file parsing failed.")
|
| 65 |
+
|
| 66 |
+
return docs
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def create_search_engine(*, file: AskFileResponse) -> VectorStore:
|
| 70 |
+
|
| 71 |
+
# Process and save data in the user session
|
| 72 |
+
docs = process_file(file=file)
|
| 73 |
+
cl.user_session.set("docs", docs)
|
| 74 |
+
|
| 75 |
+
##########################################################################
|
| 76 |
+
#
|
| 77 |
+
# 3. Set the Encoder model for creating embeddings
|
| 78 |
+
#
|
| 79 |
+
##########################################################################
|
| 80 |
+
encoder = OpenAIEmbeddings(
|
| 81 |
+
model="text-embedding-ada-002"
|
| 82 |
+
)
|
| 83 |
+
##########################################################################
|
| 84 |
+
|
| 85 |
+
# Initialize Chromadb client and settings, reset to ensure we get a clean
|
| 86 |
+
# search engine
|
| 87 |
+
client = chromadb.EphemeralClient()
|
| 88 |
+
client_settings=Settings(
|
| 89 |
+
allow_reset=True,
|
| 90 |
+
anonymized_telemetry=False
|
| 91 |
+
)
|
| 92 |
+
search_engine = Chroma(
|
| 93 |
+
client=client,
|
| 94 |
+
client_settings=client_settings
|
| 95 |
+
)
|
| 96 |
+
search_engine._client.reset()
|
| 97 |
+
|
| 98 |
+
##########################################################################
|
| 99 |
+
#
|
| 100 |
+
# 4. Create the document search engine. Remember to add
|
| 101 |
+
# client_settings using the above settings.
|
| 102 |
+
#
|
| 103 |
+
##########################################################################
|
| 104 |
+
|
| 105 |
+
search_engine = Chroma.from_documents(
|
| 106 |
+
client=client,
|
| 107 |
+
documents=docs,
|
| 108 |
+
embedding=encoder,
|
| 109 |
+
client_settings=client_settings
|
| 110 |
+
)
|
| 111 |
+
##########################################################################
|
| 112 |
+
|
| 113 |
+
return search_engine
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
@cl.on_chat_start
|
| 117 |
+
async def start():
|
| 118 |
+
|
| 119 |
+
files = None
|
| 120 |
+
while files is None:
|
| 121 |
+
files = await cl.AskFileMessage(
|
| 122 |
+
content=WELCOME_MESSAGE,
|
| 123 |
+
accept=["application/pdf"],
|
| 124 |
+
max_size_mb=20,
|
| 125 |
+
).send()
|
| 126 |
+
|
| 127 |
+
file = files[0]
|
| 128 |
+
msg = cl.Message(content=f"Processing `{file.name}`...")
|
| 129 |
+
await msg.send()
|
| 130 |
+
|
| 131 |
+
try:
|
| 132 |
+
search_engine = await cl.make_async(create_search_engine)(file=file)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
await cl.Message(content=f"Error: {e}").send()
|
| 135 |
+
raise SystemError
|
| 136 |
+
|
| 137 |
+
llm = ChatOpenAI(
|
| 138 |
+
model='gpt-3.5-turbo-16k-0613',
|
| 139 |
+
temperature=0,
|
| 140 |
+
streaming=True
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
##########################################################################
|
| 144 |
+
#
|
| 145 |
+
# 5. Create the chain / tool for RetrievalQAWithSourcesChain.
|
| 146 |
+
#
|
| 147 |
+
##########################################################################
|
| 148 |
+
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
| 149 |
+
llm=llm,
|
| 150 |
+
chain_type="stuff",
|
| 151 |
+
retriever=search_engine.as_retriever(max_tokens_limit=4097),
|
| 152 |
+
######################################################################
|
| 153 |
+
# 6. Customize prompts to improve summarization and question
|
| 154 |
+
# answering performance. Perhaps create your own prompt in prompts.py?
|
| 155 |
+
######################################################################
|
| 156 |
+
chain_type_kwargs={
|
| 157 |
+
"prompt": PROMPT,
|
| 158 |
+
"document_prompt": EXAMPLE_PROMPT
|
| 159 |
+
},
|
| 160 |
+
)
|
| 161 |
+
##########################################################################
|
| 162 |
+
|
| 163 |
+
# await msg.update(content=f"`{file.name}` processed. You can now ask questions!")
|
| 164 |
+
msg.content = f"`{file.name}` processed. You can now ask questions!"
|
| 165 |
+
await msg.update()
|
| 166 |
+
|
| 167 |
+
cl.user_session.set("chain", chain)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
@cl.on_message
|
| 171 |
+
async def main(message: cl.Message):
|
| 172 |
+
|
| 173 |
+
chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
|
| 174 |
+
cb = cl.AsyncLangchainCallbackHandler()
|
| 175 |
+
response = await chain.acall(message.content, callbacks=[cb])
|
| 176 |
+
answer = response["answer"]
|
| 177 |
+
sources = response["sources"].strip()
|
| 178 |
+
source_elements = []
|
| 179 |
+
|
| 180 |
+
# Get the documents from the user session
|
| 181 |
+
docs = cl.user_session.get("docs")
|
| 182 |
+
metadatas = [doc.metadata for doc in docs]
|
| 183 |
+
all_sources = [m["source"] for m in metadatas]
|
| 184 |
+
|
| 185 |
+
# Adding sources to the answer
|
| 186 |
+
if sources:
|
| 187 |
+
found_sources = []
|
| 188 |
+
|
| 189 |
+
# Add the sources to the message
|
| 190 |
+
for source in sources.split(","):
|
| 191 |
+
source_name = source.strip().replace(".", "")
|
| 192 |
+
# Get the index of the source
|
| 193 |
+
try:
|
| 194 |
+
index = all_sources.index(source_name)
|
| 195 |
+
except ValueError:
|
| 196 |
+
continue
|
| 197 |
+
text = docs[index].page_content
|
| 198 |
+
found_sources.append(source_name)
|
| 199 |
+
# Create the text element referenced in the message
|
| 200 |
+
source_elements.append(cl.Text(content=text, name=source_name))
|
| 201 |
+
|
| 202 |
+
if found_sources:
|
| 203 |
+
answer += f"\nSources: {', '.join(found_sources)}"
|
| 204 |
+
else:
|
| 205 |
+
answer += "\nNo sources found"
|
| 206 |
+
|
| 207 |
+
await cl.Message(content=answer, elements=source_elements).send()
|
app/prompt.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# flake8: noqa
|
| 2 |
+
from langchain.prompts import PromptTemplate
|
| 3 |
+
|
| 4 |
+
WELCOME_MESSAGE = """\
|
| 5 |
+
Welcome to Introduction to LLM App Development Sample PDF QA Application!
|
| 6 |
+
To get started:
|
| 7 |
+
1. Upload a PDF or text file
|
| 8 |
+
2. Ask any question about the file!
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
|
| 12 |
+
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
|
| 13 |
+
ALWAYS return a "SOURCES" field in your answer, with the format "SOURCES: <source1>, <source2>, <source3>, ...".
|
| 14 |
+
|
| 15 |
+
QUESTION: {question}
|
| 16 |
+
=========
|
| 17 |
+
{summaries}
|
| 18 |
+
=========
|
| 19 |
+
FINAL ANSWER:"""
|
| 20 |
+
|
| 21 |
+
PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])
|
| 22 |
+
|
| 23 |
+
EXAMPLE_PROMPT = PromptTemplate(
|
| 24 |
+
template="Content: {page_content}\nSource: {source}",
|
| 25 |
+
input_variables=["page_content", "source"],
|
| 26 |
+
)
|
chainlit.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Welcome to your PDF QA Sample Application! ππ€
|
| 2 |
+
|
| 3 |
+
Hi Team! π Congratulations on launching your first LLM Application. This application is build using OpenAI, Langchain, Chainlit, and Chroma. The goal of this application is to provite a quick overview of the most basic archetype of LLM application and the prototyping and debugging environment.
|
| 4 |
+
|
| 5 |
+
## Useful Links π
|
| 6 |
+
|
| 7 |
+
- **Langchain Documentation:** Get started with [Langchain Documentation](https://python.langchain.com/) π
|
| 8 |
+
- **Chainlit Documentation:** Get started with [Chainlit Documentation](https://docs.chainlit.io) π
|
requirements.txt
CHANGED
|
@@ -1 +1,9 @@
|
|
| 1 |
# Specify Python package requirements for your project here (e.g., Mako==1.1.1). If your project doesn't require these, you can leave this file unchanged or delete it.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Specify Python package requirements for your project here (e.g., Mako==1.1.1). If your project doesn't require these, you can leave this file unchanged or delete it.
|
| 2 |
+
openai==1.2.3
|
| 3 |
+
langchain==0.0.334
|
| 4 |
+
chainlit==0.7.501
|
| 5 |
+
tiktoken==0.5.1
|
| 6 |
+
pdfplumber==0.10.3
|
| 7 |
+
chromadb==0.4.17
|
| 8 |
+
pysqlite3-binary==0.5.2.post1
|
| 9 |
+
ruff==0.1.5
|
sample_pdf/NVDA 2QFY24.pdf
ADDED
|
Binary file (85.3 kB). View file
|
|
|