Spaces:
Sleeping
debug: Add comprehensive logging to diagnose 4-token issue
Browse filesPROBLEM:
Model generates only 4 tokens and produces no valid NDJSON patches.
Need to see what's actually being generated before filtering.
DIAGNOSTIC LOGGING ADDED:
1. Prompt Logging:
- Full prompt length
- First 500 chars (shows chat template opening)
- Last 200 chars (shows generation prompt start)
- Verifies apply_chat_template() output is correct
2. Generation Config Logging:
- max_new_tokens value
- EOS token ID
- Pad token ID
- Confirms generation settings
3. Raw Token Logging:
- Every token chunk as it arrives (debug level)
- Shows exact model output before any processing
4. Raw Line Logging:
- Every complete line before heuristic filter
- Shows what gets filtered and why
5. Buffer State Logging:
- Unparsed buffer contents after generation
- Detects partial/incomplete lines
This will reveal:
- Is the chat template correct?
- Is the model hitting EOS immediately?
- Is the model generating prose instead of JSON?
- Are we filtering out valid output by mistake?
|
@@ -444,6 +444,14 @@ Rules:
|
|
| 444 |
# Build prompt
|
| 445 |
full_prompt = self._build_prompt(text, style)
|
| 446 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
# Tokenize
|
| 448 |
inputs = self.tokenizer(full_prompt, return_tensors="pt")
|
| 449 |
inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
|
|
@@ -467,6 +475,13 @@ Rules:
|
|
| 467 |
"eos_token_id": self.tokenizer.eos_token_id,
|
| 468 |
}
|
| 469 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
# Start generation in background thread
|
| 471 |
generation_thread = threading.Thread(
|
| 472 |
target=self.model.generate, kwargs=gen_kwargs, daemon=True
|
|
@@ -485,6 +500,9 @@ Rules:
|
|
| 485 |
token_count += 1
|
| 486 |
buffer += text_chunk
|
| 487 |
|
|
|
|
|
|
|
|
|
|
| 488 |
# Process complete lines
|
| 489 |
while "\n" in buffer:
|
| 490 |
line, buffer = buffer.split("\n", 1)
|
|
@@ -493,6 +511,9 @@ Rules:
|
|
| 493 |
if not line:
|
| 494 |
continue
|
| 495 |
|
|
|
|
|
|
|
|
|
|
| 496 |
# Heuristic: skip anything that clearly isn't a JSON patch object
|
| 497 |
# This filters out lines like "#include <bits/stdc++.h>" or random prose.
|
| 498 |
if not line.startswith("{") or "op" not in line:
|
|
@@ -546,6 +567,12 @@ Rules:
|
|
| 546 |
# Wait for generation to complete
|
| 547 |
generation_thread.join()
|
| 548 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
logger.info(
|
| 550 |
f"π Model generation completed: {token_count} tokens, "
|
| 551 |
f"done_received={done_received}"
|
|
|
|
| 444 |
# Build prompt
|
| 445 |
full_prompt = self._build_prompt(text, style)
|
| 446 |
|
| 447 |
+
# DEBUG: Log the actual prompt being sent to model
|
| 448 |
+
logger.info("=" * 80)
|
| 449 |
+
logger.info("π DEBUG: Full prompt being sent to model:")
|
| 450 |
+
logger.info(f"Prompt length: {len(full_prompt)} chars")
|
| 451 |
+
logger.info(f"First 500 chars:\n{full_prompt[:500]}")
|
| 452 |
+
logger.info(f"Last 200 chars:\n{full_prompt[-200:]}")
|
| 453 |
+
logger.info("=" * 80)
|
| 454 |
+
|
| 455 |
# Tokenize
|
| 456 |
inputs = self.tokenizer(full_prompt, return_tensors="pt")
|
| 457 |
inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
|
|
|
|
| 475 |
"eos_token_id": self.tokenizer.eos_token_id,
|
| 476 |
}
|
| 477 |
|
| 478 |
+
# DEBUG: Log generation config
|
| 479 |
+
logger.info(f"ποΈ Generation config:")
|
| 480 |
+
logger.info(f" max_new_tokens: {max_new_tokens}")
|
| 481 |
+
logger.info(f" do_sample: False (deterministic)")
|
| 482 |
+
logger.info(f" eos_token_id: {self.tokenizer.eos_token_id}")
|
| 483 |
+
logger.info(f" pad_token_id: {gen_kwargs['pad_token_id']}")
|
| 484 |
+
|
| 485 |
# Start generation in background thread
|
| 486 |
generation_thread = threading.Thread(
|
| 487 |
target=self.model.generate, kwargs=gen_kwargs, daemon=True
|
|
|
|
| 500 |
token_count += 1
|
| 501 |
buffer += text_chunk
|
| 502 |
|
| 503 |
+
# DEBUG: Log every raw token chunk
|
| 504 |
+
logger.debug(f"π€ Token #{token_count}: {repr(text_chunk)}")
|
| 505 |
+
|
| 506 |
# Process complete lines
|
| 507 |
while "\n" in buffer:
|
| 508 |
line, buffer = buffer.split("\n", 1)
|
|
|
|
| 511 |
if not line:
|
| 512 |
continue
|
| 513 |
|
| 514 |
+
# DEBUG: Log every line BEFORE filtering
|
| 515 |
+
logger.info(f"π Raw line (at token #{token_count}): {line[:100]}...")
|
| 516 |
+
|
| 517 |
# Heuristic: skip anything that clearly isn't a JSON patch object
|
| 518 |
# This filters out lines like "#include <bits/stdc++.h>" or random prose.
|
| 519 |
if not line.startswith("{") or "op" not in line:
|
|
|
|
| 567 |
# Wait for generation to complete
|
| 568 |
generation_thread.join()
|
| 569 |
|
| 570 |
+
# DEBUG: Log what's left in the buffer (partial line)
|
| 571 |
+
if buffer.strip():
|
| 572 |
+
logger.warning(f"ποΈ Unparsed buffer remaining: {repr(buffer[:200])}")
|
| 573 |
+
else:
|
| 574 |
+
logger.info("β
Buffer was fully consumed (no partial lines)")
|
| 575 |
+
|
| 576 |
logger.info(
|
| 577 |
f"π Model generation completed: {token_count} tokens, "
|
| 578 |
f"done_received={done_received}"
|