Spaces:
Configuration error
Configuration error
File size: 4,543 Bytes
2b67076 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
#!/usr/bin/env bash
export HOME=/home/user
export PYTHONUNBUFFERED=1
export HF_HOME=/home/user/.cache/huggingface
export OMP_NUM_THREADS=$(nproc)
export MKL_NUM_THREADS=$(nproc)
export OPENBLAS_NUM_THREADS=$(nproc)
export NUMEXPR_NUM_THREADS=$(nproc)
export TORCH_ALLOW_TF32_CUBLAS=1
export TORCH_ALLOW_TF32_CUDNN=1
# Disable audio warnings in Docker
export SDL_AUDIODRIVER=dummy
export PULSE_RUNTIME_PATH=/tmp/pulse-runtime
# βββββββββββββββββββββββββββ CUDA DEBUG CHECKS βββββββββββββββββββββββββββ
echo "π CUDA Environment Debug Information:"
echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
# Check CUDA driver on host (if accessible)
if command -v nvidia-smi >/dev/null 2>&1; then
echo "β
nvidia-smi available"
echo "π GPU Information:"
nvidia-smi --query-gpu=name,driver_version,memory.total,memory.free --format=csv,noheader,nounits 2>/dev/null || echo "β nvidia-smi failed to query GPU"
echo "π Running Processes:"
nvidia-smi --query-compute-apps=pid,name,used_memory --format=csv,noheader,nounits 2>/dev/null || echo "βΉοΈ No running CUDA processes"
else
echo "β nvidia-smi not available in container"
fi
# Check CUDA runtime libraries
echo ""
echo "π§ CUDA Runtime Check:"
if ls /usr/local/cuda*/lib*/libcudart.so* >/dev/null 2>&1; then
echo "β
CUDA runtime libraries found:"
ls /usr/local/cuda*/lib*/libcudart.so* 2>/dev/null
else
echo "β CUDA runtime libraries not found"
fi
# Check CUDA devices
echo ""
echo "π₯οΈ CUDA Device Files:"
if ls /dev/nvidia* >/dev/null 2>&1; then
echo "β
NVIDIA device files found:"
ls -la /dev/nvidia* 2>/dev/null
else
echo "β No NVIDIA device files found - Docker may not have GPU access"
fi
# Check CUDA environment variables
echo ""
echo "π CUDA Environment Variables:"
echo " CUDA_HOME: ${CUDA_HOME:-not set}"
echo " CUDA_ROOT: ${CUDA_ROOT:-not set}"
echo " CUDA_PATH: ${CUDA_PATH:-not set}"
echo " LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-not set}"
echo " TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-not set}"
echo " CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}"
# Check PyTorch CUDA availability
echo ""
echo "π PyTorch CUDA Check:"
python3 -c "
import sys
try:
import torch
print('β
PyTorch imported successfully')
print(f' Version: {torch.__version__}')
print(f' CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
print(f' CUDA version: {torch.version.cuda}')
print(f' cuDNN version: {torch.backends.cudnn.version()}')
print(f' Device count: {torch.cuda.device_count()}')
for i in range(torch.cuda.device_count()):
props = torch.cuda.get_device_properties(i)
print(f' Device {i}: {props.name} (SM {props.major}.{props.minor}, {props.total_memory//1024//1024}MB)')
else:
print('β CUDA not available to PyTorch')
print(' This could mean:')
print(' - CUDA runtime not properly installed')
print(' - GPU not accessible to container')
print(' - Driver/runtime version mismatch')
except ImportError as e:
print(f'β Failed to import PyTorch: {e}')
except Exception as e:
print(f'β PyTorch CUDA check failed: {e}')
" 2>&1
# Check for common CUDA issues
echo ""
echo "π©Ί Common Issue Diagnostics:"
# Check if running with proper Docker flags
if [ ! -e /dev/nvidia0 ] && [ ! -e /dev/nvidiactl ]; then
echo "β No NVIDIA device nodes - container likely missing --gpus all or --runtime=nvidia"
fi
# Check CUDA library paths
if [ -z "$LD_LIBRARY_PATH" ] || ! echo "$LD_LIBRARY_PATH" | grep -q cuda; then
echo "β οΈ LD_LIBRARY_PATH may not include CUDA libraries"
fi
# Check permissions on device files
if ls /dev/nvidia* >/dev/null 2>&1; then
if ! ls -la /dev/nvidia* | grep -q "rw-rw-rw-\|rw-r--r--"; then
echo "β οΈ NVIDIA device files may have restrictive permissions"
fi
fi
echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
echo "π Starting application..."
echo ""
exec su -p user -c "python3 wgp.py --listen $*"
|