Spaces:
Running
Running
ming
commited on
Commit
·
db3b809
1
Parent(s):
45b6536
chore: code formatting improvements and update gitignore
Browse files- Format structured_summarizer.py with proper line breaks
- Add test output files to .gitignore
- Update V4_TESTING_LEARNINGS.md
- .gitignore +4 -0
- V4_TESTING_LEARNINGS.md +3 -0
- app/services/structured_summarizer.py +9 -5
.gitignore
CHANGED
|
@@ -59,3 +59,7 @@ Thumbs.db
|
|
| 59 |
|
| 60 |
# Docker
|
| 61 |
.dockerignore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
# Docker
|
| 61 |
.dockerignore
|
| 62 |
+
|
| 63 |
+
# Test output files
|
| 64 |
+
*_test_output.txt
|
| 65 |
+
*_scraping_test.txt
|
V4_TESTING_LEARNINGS.md
CHANGED
|
@@ -252,3 +252,6 @@ ENABLE_V4_WARMUP=true
|
|
| 252 |
**Best Overall**: 3B + NDJSON (once streaming issues resolved)
|
| 253 |
**Most Reliable**: 3B + Outlines JSON (slower but works)
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
**Best Overall**: 3B + NDJSON (once streaming issues resolved)
|
| 253 |
**Most Reliable**: 3B + Outlines JSON (slower but works)
|
| 254 |
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
|
app/services/structured_summarizer.py
CHANGED
|
@@ -90,14 +90,20 @@ class StructuredSummarizer:
|
|
| 90 |
|
| 91 |
# Decide device / quantization strategy
|
| 92 |
use_cuda = torch.cuda.is_available()
|
| 93 |
-
use_mps =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
use_gpu = use_cuda or use_mps
|
| 95 |
quantization_desc = "None"
|
| 96 |
|
| 97 |
if use_cuda:
|
| 98 |
logger.info("CUDA is available. Using NVIDIA GPU for V4 model.")
|
| 99 |
elif use_mps:
|
| 100 |
-
logger.info(
|
|
|
|
|
|
|
| 101 |
else:
|
| 102 |
logger.info("No GPU available. V4 model will run on CPU.")
|
| 103 |
|
|
@@ -170,9 +176,7 @@ class StructuredSummarizer:
|
|
| 170 |
|
| 171 |
if use_mps:
|
| 172 |
# MPS fallback: Load without device_map, manually move to MPS
|
| 173 |
-
logger.info(
|
| 174 |
-
f"Loading V4 model for MPS with dtype={base_dtype}"
|
| 175 |
-
)
|
| 176 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 177 |
settings.v4_model_id,
|
| 178 |
torch_dtype=base_dtype,
|
|
|
|
| 90 |
|
| 91 |
# Decide device / quantization strategy
|
| 92 |
use_cuda = torch.cuda.is_available()
|
| 93 |
+
use_mps = (
|
| 94 |
+
torch.backends.mps.is_available()
|
| 95 |
+
if hasattr(torch.backends, "mps")
|
| 96 |
+
else False
|
| 97 |
+
)
|
| 98 |
use_gpu = use_cuda or use_mps
|
| 99 |
quantization_desc = "None"
|
| 100 |
|
| 101 |
if use_cuda:
|
| 102 |
logger.info("CUDA is available. Using NVIDIA GPU for V4 model.")
|
| 103 |
elif use_mps:
|
| 104 |
+
logger.info(
|
| 105 |
+
"MPS (Metal Performance Shaders) is available. Using Apple Silicon GPU for V4 model."
|
| 106 |
+
)
|
| 107 |
else:
|
| 108 |
logger.info("No GPU available. V4 model will run on CPU.")
|
| 109 |
|
|
|
|
| 176 |
|
| 177 |
if use_mps:
|
| 178 |
# MPS fallback: Load without device_map, manually move to MPS
|
| 179 |
+
logger.info(f"Loading V4 model for MPS with dtype={base_dtype}")
|
|
|
|
|
|
|
| 180 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 181 |
settings.v4_model_id,
|
| 182 |
torch_dtype=base_dtype,
|