Spaces:
Sleeping
Sleeping
| """ | |
| Tests for V4 Structured Summarization API endpoints. | |
| """ | |
| import contextlib | |
| import json | |
| from unittest.mock import patch | |
| import pytest | |
| from fastapi.testclient import TestClient | |
| def test_v4_scrape_and_summarize_stream_success(client: TestClient): | |
| """Test successful V4 scrape-and-summarize flow with structured output.""" | |
| # Mock article scraping | |
| with patch( | |
| "app.services.article_scraper.article_scraper_service.scrape_article" | |
| ) as mock_scrape: | |
| mock_scrape.return_value = { | |
| "text": "This is a test article about artificial intelligence and machine learning. " | |
| * 20, | |
| "title": "AI Revolution", | |
| "author": "Tech Writer", | |
| "date": "2024-11-26", | |
| "site_name": "Tech News", | |
| "url": "https://example.com/ai-article", | |
| "method": "static", | |
| "scrape_time_ms": 350.5, | |
| } | |
| # Mock V4 structured summarization streaming | |
| async def mock_stream(*args, **kwargs): | |
| # Stream JSON tokens | |
| yield {"content": '{"title": "', "done": False, "tokens_used": 2} | |
| yield {"content": "AI Revolution", "done": False, "tokens_used": 5} | |
| yield {"content": '", "main_summary": "', "done": False, "tokens_used": 8} | |
| yield { | |
| "content": "AI is transforming industries", | |
| "done": False, | |
| "tokens_used": 15, | |
| } | |
| yield { | |
| "content": '", "key_points": ["AI", "ML", "Data"],', | |
| "done": False, | |
| "tokens_used": 25, | |
| } | |
| yield { | |
| "content": ' "category": "Tech", "sentiment": "positive", "read_time_min": 5}', | |
| "done": False, | |
| "tokens_used": 35, | |
| } | |
| yield { | |
| "content": "", | |
| "done": True, | |
| "tokens_used": 35, | |
| "latency_ms": 3500.0, | |
| } | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream", | |
| side_effect=mock_stream, | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "url": "https://example.com/ai-article", | |
| "style": "executive", | |
| "max_tokens": 1024, | |
| "include_metadata": True, | |
| }, | |
| ) | |
| assert response.status_code == 200 | |
| assert ( | |
| response.headers["content-type"] == "text/event-stream; charset=utf-8" | |
| ) | |
| # Parse SSE stream | |
| events = [] | |
| for line in response.text.split("\n"): | |
| if line.startswith("data: "): | |
| with contextlib.suppress(json.JSONDecodeError): | |
| events.append(json.loads(line[6:])) | |
| assert len(events) > 0 | |
| # Check metadata event | |
| metadata_events = [e for e in events if e.get("type") == "metadata"] | |
| assert len(metadata_events) == 1 | |
| metadata = metadata_events[0]["data"] | |
| assert metadata["title"] == "AI Revolution" | |
| assert metadata["style"] == "executive" | |
| assert "scrape_latency_ms" in metadata | |
| # Check content events | |
| content_events = [ | |
| e for e in events if "content" in e and not e.get("done", False) | |
| ] | |
| assert len(content_events) >= 5 | |
| # Check done event | |
| done_events = [e for e in events if e.get("done") is True] | |
| assert len(done_events) == 1 | |
| def test_v4_text_mode_success(client: TestClient): | |
| """Test V4 with direct text input (no scraping).""" | |
| async def mock_stream(*args, **kwargs): | |
| yield { | |
| "content": '{"title": "Summary", "main_summary": "Test"}', | |
| "done": False, | |
| "tokens_used": 10, | |
| } | |
| yield {"content": "", "done": True, "tokens_used": 10, "latency_ms": 2000.0} | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream", | |
| side_effect=mock_stream, | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "text": "This is a test article about technology. " * 10, | |
| "style": "skimmer", | |
| "include_metadata": True, | |
| }, | |
| ) | |
| assert response.status_code == 200 | |
| # Parse SSE stream | |
| events = [] | |
| for line in response.text.split("\n"): | |
| if line.startswith("data: "): | |
| with contextlib.suppress(json.JSONDecodeError): | |
| events.append(json.loads(line[6:])) | |
| # Check metadata event for text mode | |
| metadata_events = [e for e in events if e.get("type") == "metadata"] | |
| assert len(metadata_events) == 1 | |
| metadata = metadata_events[0]["data"] | |
| assert metadata["input_type"] == "text" | |
| assert metadata["style"] == "skimmer" | |
| def test_v4_invalid_url(client: TestClient): | |
| """Test V4 error handling for invalid URL.""" | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={"url": "not-a-valid-url", "style": "executive"}, | |
| ) | |
| assert response.status_code == 422 # Validation error | |
| def test_v4_localhost_blocked(client: TestClient): | |
| """Test V4 SSRF protection - localhost blocked.""" | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={"url": "http://localhost:8000/secret", "style": "executive"}, | |
| ) | |
| assert response.status_code == 422 | |
| assert "localhost" in response.text.lower() | |
| def test_v4_private_ip_blocked(client: TestClient): | |
| """Test V4 SSRF protection - private IPs blocked.""" | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={"url": "http://10.0.0.1/secret", "style": "executive"}, | |
| ) | |
| assert response.status_code == 422 | |
| assert "private" in response.text.lower() | |
| def test_v4_insufficient_content(client: TestClient): | |
| """Test V4 error when extracted content is insufficient.""" | |
| with patch( | |
| "app.services.article_scraper.article_scraper_service.scrape_article" | |
| ) as mock_scrape: | |
| mock_scrape.return_value = { | |
| "text": "Too short", # Less than 100 chars | |
| "title": "Test", | |
| "url": "https://example.com/short", | |
| "method": "static", | |
| "scrape_time_ms": 100.0, | |
| } | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={"url": "https://example.com/short"}, | |
| ) | |
| assert response.status_code == 422 | |
| assert "insufficient" in response.text.lower() | |
| def test_v4_scrape_failure(client: TestClient): | |
| """Test V4 error handling when scraping fails.""" | |
| with patch( | |
| "app.services.article_scraper.article_scraper_service.scrape_article" | |
| ) as mock_scrape: | |
| mock_scrape.side_effect = Exception("Connection timeout") | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={"url": "https://example.com/timeout"}, | |
| ) | |
| assert response.status_code == 502 | |
| def test_v4_style_validation(client: TestClient): | |
| """Test V4 style parameter validation.""" | |
| # Valid styles should work (validated by Pydantic enum) | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "text": "Test article content. " * 10, | |
| "style": "eli5", # Valid | |
| }, | |
| ) | |
| # Will fail because model not loaded, but validation passes | |
| assert response.status_code in [200, 500] | |
| # Invalid style should fail validation | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "text": "Test article content. " * 10, | |
| "style": "invalid_style", # Invalid | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| def test_v4_missing_url_and_text(client: TestClient): | |
| """Test V4 validation requires either URL or text.""" | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={"style": "executive"}, # Missing both url and text | |
| ) | |
| assert response.status_code == 422 | |
| assert "url" in response.text.lower() or "text" in response.text.lower() | |
| def test_v4_both_url_and_text(client: TestClient): | |
| """Test V4 validation rejects both URL and text.""" | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "url": "https://example.com/test", | |
| "text": "Test content", # Both provided - invalid | |
| "style": "executive", | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| def test_v4_max_tokens_validation(client: TestClient): | |
| """Test V4 max_tokens parameter validation.""" | |
| # Valid range (128-2048) | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "text": "Test article. " * 10, | |
| "max_tokens": 512, # Valid | |
| }, | |
| ) | |
| assert response.status_code in [200, 500] | |
| # Below minimum | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "text": "Test article. " * 10, | |
| "max_tokens": 50, # Below 128 | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| # Above maximum | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "text": "Test article. " * 10, | |
| "max_tokens": 3000, # Above 2048 | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| def test_v4_text_length_validation(client: TestClient): | |
| """Test V4 text length validation.""" | |
| # Too short | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "text": "Short", # Less than 50 chars | |
| "style": "executive", | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| # Valid length | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={ | |
| "text": "This is a valid length article for testing purposes. " * 2, | |
| "style": "executive", | |
| }, | |
| ) | |
| assert response.status_code in [200, 500] | |
| async def test_v4_sse_headers(client: TestClient): | |
| """Test V4 SSE response headers.""" | |
| async def mock_stream(*args, **kwargs): | |
| yield {"content": "test", "done": False, "tokens_used": 1} | |
| yield {"content": "", "done": True, "latency_ms": 1000.0} | |
| with ( | |
| patch( | |
| "app.services.article_scraper.article_scraper_service.scrape_article" | |
| ) as mock_scrape, | |
| patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream", | |
| side_effect=mock_stream, | |
| ), | |
| ): | |
| mock_scrape.return_value = { | |
| "text": "Test article content. " * 20, | |
| "title": "Test", | |
| "url": "https://example.com", | |
| "method": "static", | |
| "scrape_time_ms": 100.0, | |
| } | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream", | |
| json={"url": "https://example.com/test"}, | |
| ) | |
| # Check SSE headers | |
| assert response.headers["content-type"] == "text/event-stream; charset=utf-8" | |
| assert response.headers["cache-control"] == "no-cache" | |
| assert response.headers["connection"] == "keep-alive" | |
| assert "x-request-id" in response.headers | |
| # ============================================================================ | |
| # Tests for /api/v4/scrape-and-summarize/stream-json endpoint | |
| # ============================================================================ | |
| def test_v4_stream_json_url_mode_success(client: TestClient): | |
| """Test stream-json endpoint with URL input (successful scraping and JSON streaming).""" | |
| with patch( | |
| "app.services.article_scraper.article_scraper_service.scrape_article" | |
| ) as mock_scrape: | |
| mock_scrape.return_value = { | |
| "text": "Artificial intelligence is transforming modern technology. " | |
| "Machine learning algorithms are becoming more sophisticated. " | |
| "Deep learning models can now process vast amounts of data efficiently." | |
| * 10, | |
| "title": "AI Revolution 2024", | |
| "author": "Dr. Jane Smith", | |
| "date": "2024-11-30", | |
| "site_name": "Tech Insights", | |
| "url": "https://techinsights.com/ai-2024", | |
| "method": "static", | |
| "scrape_time_ms": 425.8, | |
| } | |
| # Mock JSON streaming from Outlines | |
| async def mock_json_stream(*args, **kwargs): | |
| # Yield raw JSON token fragments (simulating Outlines output) | |
| yield '{"title": "' | |
| yield "AI Revolution" | |
| yield '", "main_summary": "' | |
| yield "Artificial intelligence is rapidly evolving" | |
| yield '", "key_points": [' | |
| yield '"AI is transforming technology"' | |
| yield ', "ML algorithms are improving"' | |
| yield ', "Deep learning processes data efficiently"' | |
| yield '], "category": "' | |
| yield "Technology" | |
| yield '", "sentiment": "' | |
| yield "positive" | |
| yield '", "read_time_min": ' | |
| yield "3" | |
| yield "}" | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json", | |
| side_effect=mock_json_stream, | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "url": "https://techinsights.com/ai-2024", | |
| "style": "executive", | |
| "max_tokens": 512, | |
| "include_metadata": True, | |
| }, | |
| ) | |
| assert response.status_code == 200 | |
| assert ( | |
| response.headers["content-type"] == "text/event-stream; charset=utf-8" | |
| ) | |
| # Parse SSE stream | |
| events = [] | |
| for line in response.text.split("\n"): | |
| if line.startswith("data: "): | |
| events.append(line[6:]) # Keep raw data | |
| # First event should be metadata JSON | |
| metadata_event = json.loads(events[0]) | |
| assert metadata_event["type"] == "metadata" | |
| assert metadata_event["data"]["input_type"] == "url" | |
| assert metadata_event["data"]["url"] == "https://techinsights.com/ai-2024" | |
| assert metadata_event["data"]["title"] == "AI Revolution 2024" | |
| assert metadata_event["data"]["author"] == "Dr. Jane Smith" | |
| assert metadata_event["data"]["style"] == "executive" | |
| assert "scrape_latency_ms" in metadata_event["data"] | |
| # Rest should be raw JSON tokens | |
| json_tokens = events[1:] | |
| complete_json = "".join(json_tokens) | |
| # Verify it's valid JSON | |
| parsed_json = json.loads(complete_json) | |
| assert parsed_json["title"] == "AI Revolution" | |
| assert "AI is transforming technology" in parsed_json["key_points"] | |
| assert parsed_json["category"] == "Technology" | |
| assert parsed_json["sentiment"] == "positive" | |
| assert parsed_json["read_time_min"] == 3 | |
| def test_v4_stream_json_text_mode_success(client: TestClient): | |
| """Test stream-json endpoint with direct text input (no scraping).""" | |
| test_text = ( | |
| "Climate change poses significant challenges to global ecosystems. " | |
| "Rising temperatures affect weather patterns worldwide. " | |
| "Scientists emphasize the need for immediate action." | |
| ) | |
| async def mock_json_stream(*args, **kwargs): | |
| yield '{"title": "Climate Change Impact", ' | |
| yield '"main_summary": "Climate change affects global ecosystems", ' | |
| yield '"key_points": ["Rising temperatures", "Weather patterns"], ' | |
| yield '"category": "Environment", ' | |
| yield '"sentiment": "neutral", ' | |
| yield '"read_time_min": 1}' | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json", | |
| side_effect=mock_json_stream, | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "text": test_text, | |
| "style": "skimmer", | |
| "max_tokens": 256, | |
| "include_metadata": True, | |
| }, | |
| ) | |
| assert response.status_code == 200 | |
| # Parse events | |
| events = [] | |
| for line in response.text.split("\n"): | |
| if line.startswith("data: "): | |
| events.append(line[6:]) | |
| # Check metadata for text mode | |
| metadata_event = json.loads(events[0]) | |
| assert metadata_event["type"] == "metadata" | |
| assert metadata_event["data"]["input_type"] == "text" | |
| assert metadata_event["data"]["text_length"] == len(test_text) | |
| assert metadata_event["data"]["style"] == "skimmer" | |
| assert "url" not in metadata_event["data"] # URL mode fields not present | |
| # Verify JSON output | |
| complete_json = "".join(events[1:]) | |
| parsed_json = json.loads(complete_json) | |
| assert parsed_json["title"] == "Climate Change Impact" | |
| assert parsed_json["category"] == "Environment" | |
| def test_v4_stream_json_no_metadata(client: TestClient): | |
| """Test stream-json endpoint with include_metadata=false.""" | |
| async def mock_json_stream(*args, **kwargs): | |
| yield '{"title": "Test", ' | |
| yield '"main_summary": "Summary", ' | |
| yield '"key_points": ["A"], ' | |
| yield '"category": "Test", ' | |
| yield '"sentiment": "neutral", ' | |
| yield '"read_time_min": 1}' | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json", | |
| side_effect=mock_json_stream, | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "text": "Test article content for summary generation with enough characters to pass validation." | |
| * 2, | |
| "style": "eli5", | |
| "include_metadata": False, | |
| }, | |
| ) | |
| assert response.status_code == 200 | |
| # Parse events | |
| events = [] | |
| for line in response.text.split("\n"): | |
| if line.startswith("data: "): | |
| events.append(line[6:]) | |
| # Should NOT have metadata event (check first event) | |
| # Metadata events are complete JSON with "type": "metadata" | |
| if events and events[0]: | |
| try: | |
| first_event = json.loads(events[0]) | |
| assert first_event.get("type") != "metadata", ( | |
| "Metadata should not be included" | |
| ) | |
| except json.JSONDecodeError: | |
| # First event is not complete JSON, so it's raw tokens (good!) | |
| pass | |
| # All events should be JSON tokens that combine to valid JSON | |
| complete_json = "".join(events) | |
| parsed_json = json.loads(complete_json) | |
| assert parsed_json["title"] == "Test" | |
| def test_v4_stream_json_different_styles(client: TestClient): | |
| """Test stream-json endpoint with different summarization styles.""" | |
| styles_to_test = ["skimmer", "executive", "eli5"] | |
| for style in styles_to_test: | |
| # Capture loop variable in closure | |
| def make_mock_stream(style_name: str): | |
| async def mock_json_stream(*args, **kwargs): | |
| yield f'{{"title": "{style_name.upper()}", ' | |
| yield '"main_summary": "Test", ' | |
| yield '"key_points": ["A"], ' | |
| yield '"category": "Test", ' | |
| yield '"sentiment": "positive", ' | |
| yield '"read_time_min": 1}' | |
| return mock_json_stream | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json", | |
| side_effect=make_mock_stream(style), | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "text": "Test content for different styles with sufficient character count to pass validation requirements." | |
| * 2, | |
| "style": style, | |
| "include_metadata": False, | |
| }, | |
| ) | |
| assert response.status_code == 200, f"Failed for style: {style}" | |
| def test_v4_stream_json_custom_max_tokens(client: TestClient): | |
| """Test stream-json endpoint with custom max_tokens parameter.""" | |
| async def mock_json_stream(text, style, max_tokens=None): | |
| # Verify max_tokens is passed through | |
| assert max_tokens == 1536 | |
| yield '{"title": "Custom Tokens", ' | |
| yield '"main_summary": "Test", ' | |
| yield '"key_points": ["A"], ' | |
| yield '"category": "Test", ' | |
| yield '"sentiment": "neutral", ' | |
| yield '"read_time_min": 1}' | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json", | |
| side_effect=mock_json_stream, | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "text": "Test content with custom max tokens that meets minimum character requirements." | |
| * 3, | |
| "style": "executive", | |
| "max_tokens": 1536, | |
| "include_metadata": False, | |
| }, | |
| ) | |
| assert response.status_code == 200 | |
| def test_v4_stream_json_scraping_failure(client: TestClient): | |
| """Test stream-json endpoint when article scraping fails.""" | |
| with patch( | |
| "app.services.article_scraper.article_scraper_service.scrape_article" | |
| ) as mock_scrape: | |
| mock_scrape.side_effect = Exception("Network timeout") | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "url": "https://example.com/unreachable", | |
| "style": "executive", | |
| }, | |
| ) | |
| assert response.status_code == 502 | |
| assert "detail" in response.json() | |
| assert "scrape" in response.json()["detail"].lower() | |
| def test_v4_stream_json_content_too_short(client: TestClient): | |
| """Test stream-json endpoint when scraped content is too short.""" | |
| with patch( | |
| "app.services.article_scraper.article_scraper_service.scrape_article" | |
| ) as mock_scrape: | |
| mock_scrape.return_value = { | |
| "text": "Too short", # Less than 100 characters | |
| "title": "Short Article", | |
| "url": "https://example.com/short", | |
| "method": "static", | |
| "scrape_time_ms": 200.0, | |
| } | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "url": "https://example.com/short", | |
| "style": "executive", | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| assert "detail" in response.json() | |
| assert "insufficient" in response.json()["detail"].lower() | |
| def test_v4_stream_json_ssrf_protection(client: TestClient): | |
| """Test stream-json endpoint blocks SSRF attempts.""" | |
| ssrf_urls = [ | |
| "http://localhost/admin", | |
| "http://127.0.0.1/secrets", | |
| "http://192.168.1.1/internal", | |
| "http://10.0.0.1/private", | |
| ] | |
| for url in ssrf_urls: | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "url": url, | |
| "style": "executive", | |
| }, | |
| ) | |
| assert response.status_code == 422, f"SSRF not blocked for: {url}" | |
| # FastAPI validation errors return detail array | |
| assert "detail" in response.json() | |
| def test_v4_stream_json_validation_errors(client: TestClient): | |
| """Test stream-json endpoint input validation.""" | |
| # Missing both url and text | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={"style": "executive"}, | |
| ) | |
| assert response.status_code == 422 | |
| # Both url and text provided | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "url": "https://example.com", | |
| "text": "Some text", | |
| "style": "executive", | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| # Text too short | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "text": "Short", | |
| "style": "executive", | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| # Invalid style | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "text": "Valid length text for testing validation" * 5, | |
| "style": "invalid_style", | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| def test_v4_stream_json_response_headers(client: TestClient): | |
| """Test stream-json endpoint returns correct SSE headers.""" | |
| async def mock_json_stream(*args, **kwargs): | |
| yield '{"title": "Test", "main_summary": "Test", "key_points": [], ' | |
| yield '"category": "Test", "sentiment": "neutral", "read_time_min": 1}' | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json", | |
| side_effect=mock_json_stream, | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "text": "Test content for header validation." * 10, | |
| "style": "executive", | |
| }, | |
| ) | |
| # Verify SSE headers | |
| assert response.headers["content-type"] == "text/event-stream; charset=utf-8" | |
| assert response.headers["cache-control"] == "no-cache" | |
| assert response.headers["connection"] == "keep-alive" | |
| assert response.headers["x-accel-buffering"] == "no" | |
| assert "x-request-id" in response.headers | |
| def test_v4_stream_json_request_id_tracking(client: TestClient): | |
| """Test stream-json endpoint respects X-Request-ID header.""" | |
| custom_request_id = "test-request-12345" | |
| async def mock_json_stream(*args, **kwargs): | |
| yield '{"title": "Test", "main_summary": "Test", "key_points": [], ' | |
| yield '"category": "Test", "sentiment": "neutral", "read_time_min": 1}' | |
| with patch( | |
| "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json", | |
| side_effect=mock_json_stream, | |
| ): | |
| response = client.post( | |
| "/api/v4/scrape-and-summarize/stream-json", | |
| json={ | |
| "text": "Test content for request ID tracking." * 10, | |
| "style": "executive", | |
| }, | |
| headers={"X-Request-ID": custom_request_id}, | |
| ) | |
| assert response.headers["x-request-id"] == custom_request_id | |