ming commited on
Commit
01d5d83
·
1 Parent(s): 7019b66

chore: Add test scripts and update local configuration

Browse files

- Add HF V4 NDJSON endpoint test script
- Add HF old endpoint test for comparison
- Add V3 live test script
- Update documentation
- Update local Claude configuration

.claude/commands/commit-code.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Commit code
3
+
4
+ Review the files that have changed, and create a commit with a commit message summarizing the changes made.
5
+ Always try to give short and concise messages that convey the business logic.
6
+
7
+ Always push the code to GitHub and also Hugging Face.
8
+
9
+ Use user hints to be the message main subject $arguments
.claude/settings.local.json CHANGED
@@ -1,7 +1,30 @@
1
  {
2
  "permissions": {
3
  "allow": [
4
- "WebSearch"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "deny": [],
7
  "ask": []
 
1
  {
2
  "permissions": {
3
  "allow": [
4
+ "WebSearch",
5
+ "Bash(git add:*)",
6
+ "Bash(git commit:*)",
7
+ "Bash(git log:*)",
8
+ "Bash(pytest:*)",
9
+ "Bash(git push:*)",
10
+ "Bash(python3:*)",
11
+ "Bash(tree:*)",
12
+ "Bash(python -m pytest:*)",
13
+ "Bash(lsof:*)",
14
+ "Bash(python test_v3_live.py:*)",
15
+ "Bash(pkill:*)",
16
+ "Bash(pip install:*)",
17
+ "Bash(pip --version:*)",
18
+ "Bash(python:*)",
19
+ "Bash(conda install:*)",
20
+ "Bash(conda env:*)",
21
+ "Bash(conda run:*)",
22
+ "Bash(cat:*)",
23
+ "Bash(curl:*)",
24
+ "Bash(timeout 15 conda run --no-capture-output -n summarizer python -m uvicorn:*)",
25
+ "Bash(/opt/anaconda3/envs/summarizer/bin/python:*)",
26
+ "Bash(ENABLE_V4_WARMUP=true timeout 15 /opt/anaconda3/envs/summarizer/bin/python:*)",
27
+ "Bash(ENABLE_V4_WARMUP=true /opt/anaconda3/envs/summarizer/bin/python:*)"
28
  ],
29
  "deny": [],
30
  "ask": []
Updated V4 PDP.md ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # **Product Development Plan: Backend V4 (Structured \+ Streaming)**
2
+
3
+ ## **Objective**
4
+
5
+ Create a new API version (V4) that builds upon the V3 scraping logic.
6
+ Crucial Change: Instead of using outlines (which blocks streaming for JSON), we will use Standard Hugging Face Streaming with a strict System Prompt. This ensures the Android app receives the result token-by-token in real-time via Server-Sent Events (SSE).
7
+
8
+ ## **Constraints & Environment**
9
+
10
+ * **Platform:** Hugging Face Spaces (Docker)
11
+ * **Hardware:** CPU Only (Free Tier: 2 vCPU, 16GB RAM)
12
+ * **Memory Management:**
13
+ * **Warning:** Phi-3 Mini can spike memory. We will use torch\_dtype=torch.float32 on CPU to ensure stability, even if it uses \~8-10GB RAM.
14
+
15
+ ## **Step 1: Update Dependencies**
16
+
17
+ File: requirements.txt
18
+ Action: Ensure these libraries are present.
19
+
20
+ * einops (Required for Phi-3)
21
+ * accelerate
22
+ * transformers\>=4.41.0
23
+ * scipy (Often needed for unquantized models)
24
+ * pytest-asyncio
25
+
26
+ ## **Step 2: Define Output Schemas**
27
+
28
+ File: app/schemas/summary\_v4.py (New File)
29
+ Action: Define the structure we expect from the model (used for documentation and validation).
30
+ from pydantic import BaseModel, Field
31
+ from typing import List
32
+ from enum import Enum
33
+
34
+ class Sentiment(str, Enum):
35
+ POSITIVE \= "positive"
36
+ NEGATIVE \= "negative"
37
+ NEUTRAL \= "neutral"
38
+
39
+ class StructuredSummary(BaseModel):
40
+ title: str \= Field(..., description="A click-worthy, engaging title")
41
+ main\_summary: str \= Field(..., description="The main summary content")
42
+ key\_points: List\[str\] \= Field(..., description="List of key facts")
43
+ category: str \= Field(..., description="Topic category")
44
+ sentiment: Sentiment \= Field(..., description="Overall sentiment")
45
+ read\_time\_min: int \= Field(..., description="Estimated reading time")
46
+
47
+ ## **Step 3: Implement V4 Model Loader (Standard Transformers)**
48
+
49
+ File: app/services/model\_loader\_v4.py (New File)
50
+ Action: Create a service to load the model and tokenizer directly.
51
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
52
+ import torch
53
+ import threading
54
+
55
+ class ModelServiceV4:
56
+ \_model \= None
57
+ \_tokenizer \= None
58
+
59
+ @classmethod
60
+ def get\_model(cls):
61
+ if cls.\_model is None:
62
+ print("Loading V4 Model (Phi-3)...")
63
+ model\_id \= "microsoft/Phi-3-mini-4k-instruct"
64
+ cls.\_tokenizer \= AutoTokenizer.from\_pretrained(model\_id)
65
+ cls.\_model \= AutoModelForCausalLM.from\_pretrained(
66
+ model\_id,
67
+ torch\_dtype=torch.float32, \# CPU friendly
68
+ device\_map="cpu",
69
+ trust\_remote\_code=True
70
+ )
71
+ return cls.\_model, cls.\_tokenizer
72
+
73
+ @classmethod
74
+ def stream\_generation(cls, prompt: str):
75
+ model, tokenizer \= cls.get\_model()
76
+
77
+ inputs \= tokenizer(prompt, return\_tensors="pt", return\_attention\_mask=False)
78
+ streamer \= TextIteratorStreamer(tokenizer, skip\_prompt=True, skip\_special\_tokens=True)
79
+
80
+ generation\_kwargs \= dict(
81
+ inputs,
82
+ streamer=streamer,
83
+ max\_new\_tokens=1024,
84
+ do\_sample=True,
85
+ temperature=0.2, \# Low temp for stable JSON
86
+ )
87
+
88
+ \# Run generation in a separate thread to unblock the stream
89
+ thread \= threading.Thread(target=model.generate, kwargs=generation\_kwargs)
90
+ thread.start()
91
+
92
+ for new\_text in streamer:
93
+ yield new\_text
94
+
95
+ ## **Step 4: Create V4 Router (SSE Endpoint)**
96
+
97
+ File: app/api/v4/endpoints.py (New Path)
98
+ Action: Implement the router using StreamingResponse with text/event-stream.
99
+ from fastapi import APIRouter, HTTPException
100
+ from fastapi.responses import StreamingResponse
101
+ from app.services.model\_loader\_v4 import ModelServiceV4
102
+ \# CORRECTED IMPORT PATH:
103
+ from app.services.article\_scraper import article\_scraper\_service
104
+
105
+ router \= APIRouter()
106
+
107
+ JSON\_SYSTEM\_PROMPT \= """You are a helpful AI assistant.
108
+ You MUST reply with valid JSON only. Do not add markdown blocks.
109
+ The JSON format must exactly match this structure:
110
+ {
111
+ "title": "string",
112
+ "main\_summary": "string",
113
+ "key\_points": \["string", "string"\],
114
+ "category": "string",
115
+ "sentiment": "positive" | "negative" | "neutral",
116
+ "read\_time\_min": int
117
+ }
118
+ """
119
+
120
+ PROMPTS \= {
121
+ "skimmer": "Summarize concisely. Focus on hard facts.",
122
+ "executive": "Summarize for a CEO. Focus on business impact.",
123
+ "eli5": "Explain like I'm 5 years old."
124
+ }
125
+
126
+ @router.post("/scrape-and-summarize/stream")
127
+ async def scrape\_and\_summarize\_stream(url: str, style: str \= "executive"):
128
+ \# 1\. Scrape
129
+ try:
130
+ \# Verify this method name matches your actual service
131
+ scrape\_result \= await article\_scraper\_service.scrape\_url(url)
132
+ text \= scrape\_result.get("content", "")\[:10000\] \# Truncate for memory safety
133
+ except Exception as e:
134
+ raise HTTPException(status\_code=400, detail=f"Scraping failed: {str(e)}")
135
+
136
+ \# 2\. Construct Prompt
137
+ user\_instruction \= PROMPTS.get(style, PROMPTS\["executive"\])
138
+
139
+ \# Phi-3 Chat Template
140
+ full\_prompt \= f"\<|system|\>\\n{JSON\_SYSTEM\_PROMPT}\\n\<|end|\>\\n\<|user|\>\\n{user\_instruction}\\n\\nArticle:\\n{text}\\n\<|end|\>\\n\<|assistant|\>"
141
+
142
+ \# 3\. Stream
143
+ async def event\_generator():
144
+ \# We assume the synchronous generator can be iterated in this async wrapper
145
+ for chunk in ModelServiceV4.stream\_generation(full\_prompt):
146
+ \# SSE Format: data: {content}\\n\\n
147
+ yield chunk
148
+
149
+ return StreamingResponse(event\_generator(), media\_type="text/event-stream")
150
+
151
+ ## **Step 5: Register Router**
152
+
153
+ File: app/main.py
154
+ Action: Update the main app file to include the new router path.
155
+ \# ... existing imports
156
+ from app.api.v4 import endpoints as v4\_endpoints
157
+
158
+ \# ... inside create\_app()
159
+ app.include\_router(v4\_endpoints.router, prefix="/api/v4", tags=\["V4 Structured Summarizer"\])
160
+
161
+ ## **Step 6: Update Environment Config**
162
+
163
+ File: env.hf
164
+ Action:
165
+
166
+ * ENABLE\_V4\_STRUCTURED=true
167
+
168
+ ## **Step 7: Unit Testing (Success Verification)**
169
+
170
+ File: tests/test\_v4\_stream.py (New File)
171
+ Action: Verify the SSE stream works without loading the heavy model.
172
+ from unittest.mock import patch, MagicMock
173
+ from fastapi.testclient import TestClient
174
+ from app.main import app
175
+
176
+ client \= TestClient(app)
177
+
178
+ @patch("app.api.v4.endpoints.article\_scraper\_service")
179
+ @patch("app.services.model\_loader\_v4.ModelServiceV4.stream\_generation")
180
+ def test\_v4\_sse\_stream(mock\_stream, mock\_scraper):
181
+ \# 1\. Mock Scraper
182
+ mock\_scraper.scrape\_url.return\_value \= {"content": "Mock article content"}
183
+
184
+ \# 2\. Mock Streamer (Yields JSON chunks)
185
+ def fake\_stream(prompt):
186
+ yield '{"title":'
187
+ yield ' "Test Title"}'
188
+ mock\_stream.side\_effect \= fake\_stream
189
+
190
+ \# 3\. Request
191
+ response \= client.post("/api/v4/scrape-and-summarize/stream?url=\[http://test.com\](http://test.com)")
192
+
193
+ \# 4\. Verify SSE
194
+ assert response.status\_code \== 200
195
+ assert response.headers\["content-type"\] \== "text/event-stream"
196
+ assert b'{"title":' in response.content
197
+
198
+ **Task:** Run pytest tests/test\_v4\_stream.py and ensure it passes.
test_hf_v4_ndjson.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test the Hugging Face V4 NDJSON endpoint with a real URL.
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+
8
+ import httpx
9
+
10
+
11
+ async def test_hf_ndjson_endpoint():
12
+ """Test HF V4 NDJSON endpoint with URL scraping."""
13
+
14
+ # Hugging Face Space URL
15
+ hf_space_url = "https://colin730-summarizerapp.hf.space"
16
+
17
+ url = "https://www.nzherald.co.nz/nz/auckland/mt-wellington-homicide-jury-find-couple-not-guilty-of-murder-after-soldier-stormed-their-house-with-knife/B56S6KBHRVFCZMLDI56AZES6KY/"
18
+
19
+ print("=" * 80)
20
+ print("Hugging Face V4 NDJSON Endpoint Test")
21
+ print("=" * 80)
22
+ print(f"\nHF Space: {hf_space_url}")
23
+ print(f"Endpoint: {hf_space_url}/api/v4/scrape-and-summarize/stream-ndjson")
24
+ print(f"Article URL: {url[:80]}...")
25
+ print(f"Style: executive\n")
26
+
27
+ payload = {
28
+ "url": url,
29
+ "style": "executive",
30
+ "max_tokens": 512,
31
+ "include_metadata": True,
32
+ "use_cache": True,
33
+ }
34
+
35
+ # Longer timeout for HF (first request can be slow if cold start)
36
+ async with httpx.AsyncClient(timeout=600.0) as client:
37
+ try:
38
+ print("🔄 Sending request to Hugging Face...")
39
+ print("⏱️ Note: First request may take 30-60s if instance is cold\n")
40
+
41
+ # Make streaming request
42
+ async with client.stream(
43
+ "POST",
44
+ f"{hf_space_url}/api/v4/scrape-and-summarize/stream-ndjson",
45
+ json=payload,
46
+ ) as response:
47
+ print(f"Status: {response.status_code}")
48
+
49
+ if response.status_code != 200:
50
+ error_text = await response.aread()
51
+ error_str = error_text.decode()
52
+ print(f"\n❌ Error Response:")
53
+ print(error_str)
54
+
55
+ # Check if it's a 404 (endpoint not found)
56
+ if response.status_code == 404:
57
+ print("\n💡 The endpoint might not be deployed yet.")
58
+ print(" The HF Space may still be building (~5-10 minutes).")
59
+ print(f" Check status at: https://huggingface.co/spaces/colin730/SummarizerApp")
60
+ return
61
+
62
+ print("\n" + "=" * 80)
63
+ print("STREAMING EVENTS")
64
+ print("=" * 80)
65
+
66
+ event_count = 0
67
+ final_state = None
68
+ total_tokens = 0
69
+ metadata = None
70
+
71
+ # Parse SSE stream
72
+ async for line in response.aiter_lines():
73
+ if line.startswith("data: "):
74
+ try:
75
+ event = json.loads(line[6:])
76
+
77
+ # Handle metadata event
78
+ if event.get("type") == "metadata":
79
+ metadata = event["data"]
80
+ print("\n--- Metadata Event ---")
81
+ print(json.dumps(metadata, indent=2))
82
+ print("\n" + "-" * 80)
83
+ continue
84
+
85
+ event_count += 1
86
+
87
+ # Check for error
88
+ if "error" in event:
89
+ print(f"\n❌ ERROR: {event['error']}")
90
+
91
+ if "model not available" in event['error'].lower():
92
+ print("\n💡 This means:")
93
+ print(" - The endpoint is working ✅")
94
+ print(" - Scraping is working ✅")
95
+ print(" - But the model isn't loaded on HF")
96
+ print(" - This is expected if PyTorch/transformers aren't installed")
97
+ return
98
+
99
+ # Extract event data
100
+ delta = event.get("delta")
101
+ state = event.get("state")
102
+ done = event.get("done", False)
103
+ tokens_used = event.get("tokens_used", 0)
104
+ latency_ms = event.get("latency_ms")
105
+
106
+ total_tokens = tokens_used
107
+
108
+ # Print event details (compact format)
109
+ if delta and "op" in delta:
110
+ op = delta.get("op")
111
+ if op == "set":
112
+ field = delta.get("field")
113
+ value = delta.get("value")
114
+ value_str = str(value)[:60] + "..." if len(str(value)) > 60 else str(value)
115
+ print(f"Event #{event_count}: Set {field} = {value_str}")
116
+ elif op == "append":
117
+ field = delta.get("field")
118
+ value = delta.get("value")
119
+ value_str = str(value)[:60] + "..." if len(str(value)) > 60 else str(value)
120
+ print(f"Event #{event_count}: Append to {field}: {value_str}")
121
+ elif op == "done":
122
+ print(f"Event #{event_count}: ✅ Done signal received")
123
+ elif delta is None and done:
124
+ print(f"Event #{event_count}: 🏁 Final event (latency: {latency_ms}ms)")
125
+
126
+ # Store final state
127
+ if state:
128
+ final_state = state
129
+
130
+ except json.JSONDecodeError as e:
131
+ print(f"Failed to parse JSON: {e}")
132
+ print(f"Raw line: {line}")
133
+
134
+ # Print final results
135
+ print("\n" + "=" * 80)
136
+ print("FINAL RESULTS")
137
+ print("=" * 80)
138
+
139
+ if metadata:
140
+ print(f"\n--- Scraping Info ---")
141
+ print(f"Input type: {metadata.get('input_type')}")
142
+ print(f"Article title: {metadata.get('title')}")
143
+ print(f"Site: {metadata.get('site_name')}")
144
+ print(f"Scrape method: {metadata.get('scrape_method')}")
145
+ print(f"Scrape latency: {metadata.get('scrape_latency_ms', 0):.2f}ms")
146
+ print(f"Text extracted: {metadata.get('extracted_text_length', 0)} chars")
147
+
148
+ print(f"\nTotal events: {event_count}")
149
+ print(f"Total tokens: {total_tokens}")
150
+
151
+ if final_state:
152
+ print("\n--- Final Structured State ---")
153
+ print(json.dumps(final_state, indent=2, ensure_ascii=False))
154
+
155
+ # Validate structure
156
+ print("\n--- Validation ---")
157
+ required_fields = ["title", "main_summary", "key_points", "category", "sentiment", "read_time_min"]
158
+
159
+ all_valid = True
160
+ for field in required_fields:
161
+ value = final_state.get(field)
162
+ if field == "key_points":
163
+ if isinstance(value, list) and len(value) > 0:
164
+ print(f"✅ {field}: {len(value)} items")
165
+ else:
166
+ print(f"⚠️ {field}: empty or not a list")
167
+ all_valid = False
168
+ else:
169
+ if value is not None:
170
+ value_str = str(value)[:50] + "..." if len(str(value)) > 50 else str(value)
171
+ print(f"✅ {field}: {value_str}")
172
+ else:
173
+ print(f"⚠️ {field}: None")
174
+ all_valid = False
175
+
176
+ # Check sentiment is valid
177
+ sentiment = final_state.get("sentiment")
178
+ valid_sentiments = ["positive", "negative", "neutral"]
179
+ if sentiment in valid_sentiments:
180
+ print(f"✅ sentiment value is valid: {sentiment}")
181
+ else:
182
+ print(f"⚠️ sentiment value is invalid: {sentiment}")
183
+ all_valid = False
184
+
185
+ print("\n" + "=" * 80)
186
+ if all_valid:
187
+ print("✅ ALL VALIDATIONS PASSED - HF ENDPOINT WORKING!")
188
+ else:
189
+ print("⚠️ Some validations failed")
190
+ print("=" * 80)
191
+ else:
192
+ print("\n⚠️ No final state received")
193
+
194
+ except httpx.ConnectError:
195
+ print(f"\n❌ Could not connect to {hf_space_url}")
196
+ print("\n💡 Possible reasons:")
197
+ print(" 1. HF Space is still building/deploying")
198
+ print(" 2. HF Space is sleeping (free tier)")
199
+ print(" 3. Network connectivity issue")
200
+ print(f"\n🔗 Check space status: https://huggingface.co/spaces/colin730/SummarizerApp")
201
+ except httpx.ReadTimeout:
202
+ print("\n⏱️ Request timed out")
203
+ print(" This might mean the HF Space is cold-starting")
204
+ print(" Try again in a few moments")
205
+ except Exception as e:
206
+ print(f"\n❌ Error: {e}")
207
+ import traceback
208
+ traceback.print_exc()
209
+
210
+
211
+ if __name__ == "__main__":
212
+ print("\n🚀 Testing Hugging Face V4 NDJSON Endpoint\n")
213
+ asyncio.run(test_hf_ndjson_endpoint())
214
+
test_hf_v4_old.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test the old HF V4 endpoint to see what the model generates.
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+
8
+ import httpx
9
+
10
+
11
+ async def test_hf_old_endpoint():
12
+ """Test HF V4 old (non-NDJSON) endpoint."""
13
+
14
+ hf_space_url = "https://colin730-summarizerapp.hf.space"
15
+
16
+ url = "https://www.nzherald.co.nz/nz/auckland/mt-wellington-homicide-jury-find-couple-not-guilty-of-murder-after-soldier-stormed-their-house-with-knife/B56S6KBHRVFCZMLDI56AZES6KY/"
17
+
18
+ print("=" * 80)
19
+ print("Hugging Face V4 OLD Endpoint Test (for comparison)")
20
+ print("=" * 80)
21
+ print(f"\nEndpoint: {hf_space_url}/api/v4/scrape-and-summarize/stream")
22
+ print(f"Article URL: {url[:80]}...")
23
+ print(f"Style: executive\n")
24
+
25
+ payload = {
26
+ "url": url,
27
+ "style": "executive",
28
+ "max_tokens": 512,
29
+ "include_metadata": True,
30
+ "use_cache": True,
31
+ }
32
+
33
+ async with httpx.AsyncClient(timeout=600.0) as client:
34
+ try:
35
+ print("🔄 Sending request to old V4 endpoint...\n")
36
+
37
+ async with client.stream(
38
+ "POST",
39
+ f"{hf_space_url}/api/v4/scrape-and-summarize/stream",
40
+ json=payload,
41
+ ) as response:
42
+ print(f"Status: {response.status_code}\n")
43
+
44
+ if response.status_code != 200:
45
+ error_text = await response.aread()
46
+ print(f"❌ Error: {error_text.decode()}")
47
+ return
48
+
49
+ print("=" * 80)
50
+ print("MODEL OUTPUT (Raw)")
51
+ print("=" * 80)
52
+ print()
53
+
54
+ full_content = []
55
+ token_count = 0
56
+
57
+ async for line in response.aiter_lines():
58
+ if line.startswith("data: "):
59
+ try:
60
+ event = json.loads(line[6:])
61
+
62
+ # Metadata
63
+ if event.get("type") == "metadata":
64
+ print("--- Metadata ---")
65
+ print(json.dumps(event["data"], indent=2))
66
+ print("\n" + "-" * 80 + "\n")
67
+ continue
68
+
69
+ # Error
70
+ if "error" in event:
71
+ print(f"\n❌ ERROR: {event['error']}")
72
+ return
73
+
74
+ # Content
75
+ if "content" in event and not event.get("done"):
76
+ content = event["content"]
77
+ full_content.append(content)
78
+ print(content, end="", flush=True)
79
+ token_count = event.get("tokens_used", token_count)
80
+
81
+ # Done
82
+ elif event.get("done"):
83
+ latency = event.get("latency_ms", 0)
84
+ token_count = event.get("tokens_used", token_count)
85
+ print(f"\n\n{'=' * 80}")
86
+ print(f"✅ Done | Tokens: {token_count} | Latency: {latency:.2f}ms")
87
+ print("=" * 80)
88
+
89
+ except json.JSONDecodeError as e:
90
+ print(f"\nJSON Error: {e}")
91
+ print(f"Raw: {line}")
92
+
93
+ # Try to parse as JSON
94
+ full_text = "".join(full_content)
95
+ if full_text:
96
+ print("\n--- Attempting JSON Parse ---")
97
+ try:
98
+ parsed = json.loads(full_text)
99
+ print("✅ Valid JSON!")
100
+ print(json.dumps(parsed, indent=2))
101
+ except json.JSONDecodeError:
102
+ print("❌ Not valid JSON")
103
+ print("This is the raw model output (not JSON-formatted)")
104
+
105
+ except Exception as e:
106
+ print(f"\n❌ Error: {e}")
107
+ import traceback
108
+ traceback.print_exc()
109
+
110
+
111
+ if __name__ == "__main__":
112
+ print("\n🧪 Testing Old V4 Endpoint\n")
113
+ asyncio.run(test_hf_old_endpoint())
114
+
test_v3_live.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Live test of V3 API endpoint with real URL.
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+
8
+ import httpx
9
+
10
+
11
+ async def test_v3_streaming():
12
+ """Test V3 scraping and summarization with streaming."""
13
+ url = "https://www.nzherald.co.nz/nz/prominent-executive-who-admitted-receiving-commercial-sex-services-from-girl-bought-her-uber-eats-200-gift-card-1000-cash/RWWAZCPM4BDHNPKLGGAPUKVQ7M/"
14
+
15
+ async with httpx.AsyncClient(timeout=300.0) as client:
16
+ # Make streaming request
17
+ async with client.stream(
18
+ "POST",
19
+ "http://localhost:7860/api/v3/scrape-and-summarize/stream",
20
+ json={
21
+ "url": url,
22
+ "max_tokens": 256,
23
+ "include_metadata": True,
24
+ },
25
+ ) as response:
26
+ print(f"Status: {response.status_code}")
27
+ print(f"Headers: {dict(response.headers)}\n")
28
+
29
+ if response.status_code != 200:
30
+ error_text = await response.aread()
31
+ print(f"Error: {error_text.decode()}")
32
+ return
33
+
34
+ # Parse SSE stream
35
+ full_summary = []
36
+ async for line in response.aiter_lines():
37
+ if line.startswith("data: "):
38
+ try:
39
+ event = json.loads(line[6:])
40
+
41
+ # Print metadata event
42
+ if event.get("type") == "metadata":
43
+ print("=== ARTICLE METADATA ===")
44
+ metadata = event["data"]
45
+ print(f"Title: {metadata.get('title', 'N/A')}")
46
+ print(f"Author: {metadata.get('author', 'N/A')}")
47
+ print(f"Site: {metadata.get('site_name', 'N/A')}")
48
+ print(f"Scrape latency: {metadata.get('scrape_latency_ms', 0):.2f}ms")
49
+ print(f"Extracted text length: {metadata.get('extracted_text_length', 0)} chars")
50
+ print()
51
+
52
+ # Collect content chunks
53
+ elif "content" in event:
54
+ if not event.get("done", False):
55
+ content = event["content"]
56
+ full_summary.append(content)
57
+ print(content, end="", flush=True)
58
+ else:
59
+ # Done event
60
+ print(f"\n\n=== SUMMARY STATS ===")
61
+ print(f"Tokens used: {event.get('tokens_used', 0)}")
62
+ print(f"Latency: {event.get('latency_ms', 0):.2f}ms")
63
+
64
+ # Error event
65
+ elif "error" in event:
66
+ print(f"\n\nERROR: {event['error']}")
67
+
68
+ except json.JSONDecodeError as e:
69
+ print(f"Failed to parse JSON: {e}")
70
+ print(f"Raw line: {line}")
71
+
72
+ print("\n\n=== FULL SUMMARY ===")
73
+ print("".join(full_summary))
74
+
75
+
76
+ if __name__ == "__main__":
77
+ print("Testing V3 API with NZ Herald article...\n")
78
+ asyncio.run(test_v3_streaming())