Spaces:
Running
on
A10G
Running
on
A10G
Oleg Shulyakov
commited on
Commit
·
b7bd975
1
Parent(s):
05d1b68
Migrate Docker to official llama.cpp CUDA image
Browse files- .dockerignore +15 -3
- .gitignore +203 -8
- Dockerfile +14 -49
- docker-compose.yml +4 -4
- requirements.txt +5 -0
- start.sh +3 -15
.dockerignore
CHANGED
|
@@ -1,3 +1,15 @@
|
|
| 1 |
-
|
| 2 |
-
/
|
| 3 |
-
/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# IDE
|
| 2 |
+
.idea/
|
| 3 |
+
.vscode/
|
| 4 |
+
|
| 5 |
+
.git*
|
| 6 |
+
.dockerignore
|
| 7 |
+
docker-compose.yml
|
| 8 |
+
Dockerfile
|
| 9 |
+
|
| 10 |
+
# LLama.cpp
|
| 11 |
+
llama.cpp/
|
| 12 |
+
|
| 13 |
+
# Working files
|
| 14 |
+
downloads/
|
| 15 |
+
outputs/
|
.gitignore
CHANGED
|
@@ -1,3 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Byte-compiled / optimized / DLL files
|
| 2 |
__pycache__/
|
| 3 |
*.py[cod]
|
|
@@ -11,7 +150,6 @@ __pycache__/
|
|
| 11 |
build/
|
| 12 |
develop-eggs/
|
| 13 |
dist/
|
| 14 |
-
downloads/
|
| 15 |
eggs/
|
| 16 |
.eggs/
|
| 17 |
lib/
|
|
@@ -106,10 +244,8 @@ ipython_config.py
|
|
| 106 |
#pdm.lock
|
| 107 |
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
# in version control.
|
| 109 |
-
# https://pdm.fming.dev
|
| 110 |
.pdm.toml
|
| 111 |
-
.pdm-python
|
| 112 |
-
.pdm-build/
|
| 113 |
|
| 114 |
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 115 |
__pypackages__/
|
|
@@ -161,7 +297,66 @@ cython_debug/
|
|
| 161 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 162 |
#.idea/
|
| 163 |
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created by https://www.toptal.com/developers/gitignore/api/linux,macos,windows,python,jetbrains+all,visualstudiocode
|
| 2 |
+
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,windows,python,jetbrains+all,visualstudiocode
|
| 3 |
+
|
| 4 |
+
### JetBrains+all ###
|
| 5 |
+
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
| 6 |
+
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
| 7 |
+
|
| 8 |
+
# User-specific stuff
|
| 9 |
+
.idea/**/workspace.xml
|
| 10 |
+
.idea/**/tasks.xml
|
| 11 |
+
.idea/**/usage.statistics.xml
|
| 12 |
+
.idea/**/dictionaries
|
| 13 |
+
.idea/**/shelf
|
| 14 |
+
|
| 15 |
+
# AWS User-specific
|
| 16 |
+
.idea/**/aws.xml
|
| 17 |
+
|
| 18 |
+
# Generated files
|
| 19 |
+
.idea/**/contentModel.xml
|
| 20 |
+
|
| 21 |
+
# Sensitive or high-churn files
|
| 22 |
+
.idea/**/dataSources/
|
| 23 |
+
.idea/**/dataSources.ids
|
| 24 |
+
.idea/**/dataSources.local.xml
|
| 25 |
+
.idea/**/sqlDataSources.xml
|
| 26 |
+
.idea/**/dynamic.xml
|
| 27 |
+
.idea/**/uiDesigner.xml
|
| 28 |
+
.idea/**/dbnavigator.xml
|
| 29 |
+
|
| 30 |
+
# Gradle
|
| 31 |
+
.idea/**/gradle.xml
|
| 32 |
+
.idea/**/libraries
|
| 33 |
+
|
| 34 |
+
# Gradle and Maven with auto-import
|
| 35 |
+
# When using Gradle or Maven with auto-import, you should exclude module files,
|
| 36 |
+
# since they will be recreated, and may cause churn. Uncomment if using
|
| 37 |
+
# auto-import.
|
| 38 |
+
# .idea/artifacts
|
| 39 |
+
# .idea/compiler.xml
|
| 40 |
+
# .idea/jarRepositories.xml
|
| 41 |
+
# .idea/modules.xml
|
| 42 |
+
# .idea/*.iml
|
| 43 |
+
# .idea/modules
|
| 44 |
+
# *.iml
|
| 45 |
+
# *.ipr
|
| 46 |
+
|
| 47 |
+
# CMake
|
| 48 |
+
cmake-build-*/
|
| 49 |
+
|
| 50 |
+
# Mongo Explorer plugin
|
| 51 |
+
.idea/**/mongoSettings.xml
|
| 52 |
+
|
| 53 |
+
# File-based project format
|
| 54 |
+
*.iws
|
| 55 |
+
|
| 56 |
+
# IntelliJ
|
| 57 |
+
out/
|
| 58 |
+
|
| 59 |
+
# mpeltonen/sbt-idea plugin
|
| 60 |
+
.idea_modules/
|
| 61 |
+
|
| 62 |
+
# JIRA plugin
|
| 63 |
+
atlassian-ide-plugin.xml
|
| 64 |
+
|
| 65 |
+
# Cursive Clojure plugin
|
| 66 |
+
.idea/replstate.xml
|
| 67 |
+
|
| 68 |
+
# SonarLint plugin
|
| 69 |
+
.idea/sonarlint/
|
| 70 |
+
|
| 71 |
+
# Crashlytics plugin (for Android Studio and IntelliJ)
|
| 72 |
+
com_crashlytics_export_strings.xml
|
| 73 |
+
crashlytics.properties
|
| 74 |
+
crashlytics-build.properties
|
| 75 |
+
fabric.properties
|
| 76 |
+
|
| 77 |
+
# Editor-based Rest Client
|
| 78 |
+
.idea/httpRequests
|
| 79 |
+
|
| 80 |
+
# Android studio 3.1+ serialized cache file
|
| 81 |
+
.idea/caches/build_file_checksums.ser
|
| 82 |
+
|
| 83 |
+
### JetBrains+all Patch ###
|
| 84 |
+
# Ignore everything but code style settings and run configurations
|
| 85 |
+
# that are supposed to be shared within teams.
|
| 86 |
+
|
| 87 |
+
.idea/*
|
| 88 |
+
|
| 89 |
+
!.idea/codeStyles
|
| 90 |
+
!.idea/runConfigurations
|
| 91 |
+
|
| 92 |
+
### Linux ###
|
| 93 |
+
*~
|
| 94 |
+
|
| 95 |
+
# temporary files which can be created if a process still has a handle open of a deleted file
|
| 96 |
+
.fuse_hidden*
|
| 97 |
+
|
| 98 |
+
# KDE directory preferences
|
| 99 |
+
.directory
|
| 100 |
+
|
| 101 |
+
# Linux trash folder which might appear on any partition or disk
|
| 102 |
+
.Trash-*
|
| 103 |
+
|
| 104 |
+
# .nfs files are created when an open file is removed but is still being accessed
|
| 105 |
+
.nfs*
|
| 106 |
+
|
| 107 |
+
### macOS ###
|
| 108 |
+
# General
|
| 109 |
+
.DS_Store
|
| 110 |
+
.AppleDouble
|
| 111 |
+
.LSOverride
|
| 112 |
+
|
| 113 |
+
# Icon must end with two \r
|
| 114 |
+
Icon
|
| 115 |
+
|
| 116 |
+
# Thumbnails
|
| 117 |
+
._*
|
| 118 |
+
|
| 119 |
+
# Files that might appear in the root of a volume
|
| 120 |
+
.DocumentRevisions-V100
|
| 121 |
+
.fseventsd
|
| 122 |
+
.Spotlight-V100
|
| 123 |
+
.TemporaryItems
|
| 124 |
+
.Trashes
|
| 125 |
+
.VolumeIcon.icns
|
| 126 |
+
.com.apple.timemachine.donotpresent
|
| 127 |
+
|
| 128 |
+
# Directories potentially created on remote AFP share
|
| 129 |
+
.AppleDB
|
| 130 |
+
.AppleDesktop
|
| 131 |
+
Network Trash Folder
|
| 132 |
+
Temporary Items
|
| 133 |
+
.apdisk
|
| 134 |
+
|
| 135 |
+
### macOS Patch ###
|
| 136 |
+
# iCloud generated files
|
| 137 |
+
*.icloud
|
| 138 |
+
|
| 139 |
+
### Python ###
|
| 140 |
# Byte-compiled / optimized / DLL files
|
| 141 |
__pycache__/
|
| 142 |
*.py[cod]
|
|
|
|
| 150 |
build/
|
| 151 |
develop-eggs/
|
| 152 |
dist/
|
|
|
|
| 153 |
eggs/
|
| 154 |
.eggs/
|
| 155 |
lib/
|
|
|
|
| 244 |
#pdm.lock
|
| 245 |
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 246 |
# in version control.
|
| 247 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 248 |
.pdm.toml
|
|
|
|
|
|
|
| 249 |
|
| 250 |
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 251 |
__pypackages__/
|
|
|
|
| 297 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 298 |
#.idea/
|
| 299 |
|
| 300 |
+
### Python Patch ###
|
| 301 |
+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
| 302 |
+
poetry.toml
|
| 303 |
+
|
| 304 |
+
# ruff
|
| 305 |
+
.ruff_cache/
|
| 306 |
+
|
| 307 |
+
# LSP config files
|
| 308 |
+
pyrightconfig.json
|
| 309 |
+
|
| 310 |
+
### VisualStudioCode ###
|
| 311 |
+
.vscode/*
|
| 312 |
+
!.vscode/settings.json
|
| 313 |
+
!.vscode/tasks.json
|
| 314 |
+
!.vscode/launch.json
|
| 315 |
+
!.vscode/extensions.json
|
| 316 |
+
!.vscode/*.code-snippets
|
| 317 |
+
|
| 318 |
+
# Local History for Visual Studio Code
|
| 319 |
+
.history/
|
| 320 |
+
|
| 321 |
+
# Built Visual Studio Code Extensions
|
| 322 |
+
*.vsix
|
| 323 |
+
|
| 324 |
+
### VisualStudioCode Patch ###
|
| 325 |
+
# Ignore all local history of files
|
| 326 |
+
.history
|
| 327 |
+
.ionide
|
| 328 |
+
|
| 329 |
+
### Windows ###
|
| 330 |
+
# Windows thumbnail cache files
|
| 331 |
+
Thumbs.db
|
| 332 |
+
Thumbs.db:encryptable
|
| 333 |
+
ehthumbs.db
|
| 334 |
+
ehthumbs_vista.db
|
| 335 |
+
|
| 336 |
+
# Dump file
|
| 337 |
+
*.stackdump
|
| 338 |
+
|
| 339 |
+
# Folder config file
|
| 340 |
+
[Dd]esktop.ini
|
| 341 |
+
|
| 342 |
+
# Recycle Bin used on file shares
|
| 343 |
+
$RECYCLE.BIN/
|
| 344 |
+
|
| 345 |
+
# Windows Installer files
|
| 346 |
+
*.cab
|
| 347 |
+
*.msi
|
| 348 |
+
*.msix
|
| 349 |
+
*.msm
|
| 350 |
+
*.msp
|
| 351 |
+
|
| 352 |
+
# Windows shortcuts
|
| 353 |
+
*.lnk
|
| 354 |
+
|
| 355 |
+
# End of https://www.toptal.com/developers/gitignore/api/linux,macos,windows,python,jetbrains+all,visualstudiocode
|
| 356 |
+
|
| 357 |
+
# Working folders
|
| 358 |
+
downloads/
|
| 359 |
+
outputs/
|
| 360 |
+
llama.cpp/
|
| 361 |
+
|
| 362 |
+
!*/.keep
|
Dockerfile
CHANGED
|
@@ -1,65 +1,30 @@
|
|
| 1 |
-
FROM
|
| 2 |
-
|
| 3 |
-
ENV DEBIAN_FRONTEND=noninteractive
|
| 4 |
-
RUN apt-get update && \
|
| 5 |
-
apt-get upgrade -y && \
|
| 6 |
-
apt-get install -y --no-install-recommends --fix-missing \
|
| 7 |
-
git \
|
| 8 |
-
git-lfs \
|
| 9 |
-
wget \
|
| 10 |
-
curl \
|
| 11 |
-
cmake \
|
| 12 |
-
# python build dependencies \
|
| 13 |
-
build-essential \
|
| 14 |
-
libssl-dev \
|
| 15 |
-
zlib1g-dev \
|
| 16 |
-
libbz2-dev \
|
| 17 |
-
libreadline-dev \
|
| 18 |
-
libsqlite3-dev \
|
| 19 |
-
libncursesw5-dev \
|
| 20 |
-
xz-utils \
|
| 21 |
-
tk-dev \
|
| 22 |
-
libxml2-dev \
|
| 23 |
-
libxmlsec1-dev \
|
| 24 |
-
libffi-dev \
|
| 25 |
-
liblzma-dev \
|
| 26 |
-
ffmpeg \
|
| 27 |
-
nvidia-driver-570
|
| 28 |
|
| 29 |
# Check if user with UID 1000 exists, if not create it
|
| 30 |
RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
|
| 31 |
USER 1000
|
|
|
|
| 32 |
ENV HOME=/home/user \
|
| 33 |
-
PATH
|
| 34 |
-
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
|
| 38 |
-
ARG PYTHON_VERSION=3.11
|
| 39 |
-
RUN pyenv install ${PYTHON_VERSION} && \
|
| 40 |
-
pyenv global ${PYTHON_VERSION} && \
|
| 41 |
-
pyenv rehash && \
|
| 42 |
-
pip install --no-cache-dir -U pip setuptools wheel && \
|
| 43 |
-
pip install "huggingface-hub" "hf-transfer" "gradio[oauth]" "gradio_huggingfacehub_search" "APScheduler"
|
| 44 |
|
| 45 |
-
COPY --chown=1000 . ${HOME}/app
|
| 46 |
-
RUN git clone https://github.com/ggerganov/llama.cpp
|
| 47 |
-
RUN pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
|
| 48 |
|
| 49 |
-
|
| 50 |
|
| 51 |
-
ENV PYTHONPATH=${HOME}/
|
| 52 |
-
|
|
|
|
| 53 |
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
| 54 |
GRADIO_ALLOW_FLAGGING=never \
|
| 55 |
GRADIO_NUM_PORTS=1 \
|
| 56 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
| 57 |
GRADIO_THEME=huggingface \
|
| 58 |
-
|
| 59 |
-
TQDM_MININTERVAL=1 \
|
| 60 |
-
SYSTEM=spaces \
|
| 61 |
-
LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
|
| 62 |
-
PATH=/usr/local/nvidia/bin:${PATH}
|
| 63 |
|
| 64 |
-
|
| 65 |
|
|
|
|
|
|
| 1 |
+
FROM ghcr.io/ggml-org/llama.cpp:full-cuda
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
# Check if user with UID 1000 exists, if not create it
|
| 4 |
RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
|
| 5 |
USER 1000
|
| 6 |
+
|
| 7 |
ENV HOME=/home/user \
|
| 8 |
+
PATH=${PATH}:/home/user/.local/bin \
|
| 9 |
+
PATH=${PATH}:/app \
|
| 10 |
+
PATH=${PATH}:/usr/local/nvidia/bin
|
| 11 |
|
| 12 |
+
WORKDIR ${HOME}/app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
COPY --chown=1000 requirements.txt ${HOME}/app
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
|
| 18 |
+
ENV PYTHONPATH=${PYTHONPATH}:${HOME}/.local/bin \
|
| 19 |
+
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/app \
|
| 20 |
+
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 \
|
| 21 |
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
| 22 |
GRADIO_ALLOW_FLAGGING=never \
|
| 23 |
GRADIO_NUM_PORTS=1 \
|
| 24 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
| 25 |
GRADIO_THEME=huggingface \
|
| 26 |
+
SYSTEM=spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
COPY --chown=1000 . ${HOME}/app
|
| 29 |
|
| 30 |
+
ENTRYPOINT ["/bin/bash", "start.sh"]
|
docker-compose.yml
CHANGED
|
@@ -1,16 +1,16 @@
|
|
| 1 |
-
# Docker compose file to LOCAL development
|
| 2 |
-
|
| 3 |
services:
|
| 4 |
gguf-my-repo:
|
| 5 |
build:
|
| 6 |
context: .
|
| 7 |
dockerfile: Dockerfile
|
| 8 |
-
image: gguf-my-repo
|
| 9 |
container_name: gguf-my-repo
|
| 10 |
ports:
|
| 11 |
- "7860:7860"
|
| 12 |
volumes:
|
| 13 |
- .:/home/user/app
|
| 14 |
environment:
|
| 15 |
-
-
|
|
|
|
| 16 |
- HF_TOKEN=${HF_TOKEN}
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
services:
|
| 2 |
gguf-my-repo:
|
| 3 |
build:
|
| 4 |
context: .
|
| 5 |
dockerfile: Dockerfile
|
| 6 |
+
image: gguf-my-repo-cuda
|
| 7 |
container_name: gguf-my-repo
|
| 8 |
ports:
|
| 9 |
- "7860:7860"
|
| 10 |
volumes:
|
| 11 |
- .:/home/user/app
|
| 12 |
environment:
|
| 13 |
+
- RUN_CUDA=1
|
| 14 |
+
- RUN_LOCALLY=0
|
| 15 |
- HF_TOKEN=${HF_TOKEN}
|
| 16 |
+
- HF_HUB_CACHE=/home/user/app/downloads
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
huggingface-hub
|
| 2 |
+
hf-transfer
|
| 3 |
+
gradio[oauth]
|
| 4 |
+
gradio_huggingfacehub_search
|
| 5 |
+
APScheduler
|
start.sh
CHANGED
|
@@ -1,21 +1,9 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
-
if [ ! -d "llama.cpp" ]; then
|
| 4 |
-
# only run in dev env
|
| 5 |
-
git clone https://github.com/ggerganov/llama.cpp
|
| 6 |
-
fi
|
| 7 |
-
|
| 8 |
export GGML_CUDA=OFF
|
| 9 |
-
|
| 10 |
-
|
| 11 |
export GGML_CUDA=ON
|
| 12 |
fi
|
| 13 |
|
| 14 |
-
|
| 15 |
-
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} -DLLAMA_CURL=OFF
|
| 16 |
-
cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
|
| 17 |
-
cp ./build/bin/llama-* .
|
| 18 |
-
rm -rf build
|
| 19 |
-
|
| 20 |
-
cd ..
|
| 21 |
-
python app.py
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
export GGML_CUDA=OFF
|
| 4 |
+
# enable CUDA
|
| 5 |
+
if [[ -z "${RUN_CUDA}" ]]; then
|
| 6 |
export GGML_CUDA=ON
|
| 7 |
fi
|
| 8 |
|
| 9 |
+
python3 app.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|