Update form/data/schema.py
Browse files- form/data/schema.py +560 -63
form/data/schema.py
CHANGED
|
@@ -1,75 +1,572 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
}
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
"
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
}
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
"
|
|
|
|
|
|
|
|
|
|
| 55 |
}
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
"
|
| 61 |
}
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
| 66 |
}
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import hashlib
|
| 3 |
+
from datetime import datetime, timezone
|
| 4 |
+
from typing import Dict, Any, List, Optional, Union
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from pydantic import BaseModel, Field, validator
|
| 7 |
+
import pyld
|
| 8 |
+
from pyld import jsonld
|
| 9 |
+
|
| 10 |
+
class UnknownAISystem(BaseModel):
|
| 11 |
+
"""Schema for unknown AI systems described by users."""
|
| 12 |
+
description: str = Field(..., min_length=1, description="User description of unknown AI system")
|
| 13 |
+
|
| 14 |
+
class RawAIFlawReport(BaseModel):
|
| 15 |
+
"""Raw form input - matches actual complete form structure"""
|
| 16 |
+
|
| 17 |
+
class Config:
|
| 18 |
+
extra = "allow"
|
| 19 |
+
populate_by_name = True
|
| 20 |
+
|
| 21 |
+
report_id: Optional[str] = Field(None, alias="Report ID")
|
| 22 |
+
reporter_id: Optional[str] = Field(None, alias="Reporter ID")
|
| 23 |
+
session_id: Optional[str] = Field(None, alias="Session ID")
|
| 24 |
+
systems: Optional[List[str]] = Field(default=[], alias="Systems")
|
| 25 |
+
flaw_timestamp_start: Optional[str] = Field(None, alias="Flaw Timestamp Start")
|
| 26 |
+
submission_timestamp: Optional[str] = Field(None, alias="Submission Timestamp")
|
| 27 |
+
|
| 28 |
+
report_types: List[str] = Field(default=[], alias="Report Types")
|
| 29 |
+
|
| 30 |
+
flaw_description: Optional[str] = Field(None, alias="Flaw Description")
|
| 31 |
+
incident_description: Optional[str] = Field(None, alias="Incident Description")
|
| 32 |
+
incident_description_detailed: Optional[str] = Field(None, alias="Incident Description - Detailed")
|
| 33 |
+
flaw_description_detailed: Optional[str] = Field(None, alias="Flaw Description - Detailed")
|
| 34 |
+
|
| 35 |
+
policy_violation: Optional[str] = Field(None, alias="Policy Violation")
|
| 36 |
+
potential_policy_violation: Optional[str] = Field(None, alias="Potential Policy Violation")
|
| 37 |
+
severity: Optional[str] = Field(None, alias="Severity")
|
| 38 |
+
prevalence: Optional[str] = Field(None, alias="Prevalence")
|
| 39 |
+
|
| 40 |
+
impacts: Optional[List[str]] = Field(default=[], alias="Impacts")
|
| 41 |
+
impacts_other: Optional[str] = Field(None, alias="Impacts_Other")
|
| 42 |
+
specific_harm_types: Optional[List[str]] = Field(default=[], alias="Specific Harm Types")
|
| 43 |
+
impacted_stakeholders: Optional[List[str]] = Field(default=[], alias="Impacted Stakeholder(s)")
|
| 44 |
+
csam_related: Optional[str] = Field(None, alias="CSAM Related")
|
| 45 |
+
|
| 46 |
+
risk_sources: Optional[Dict[str, Any]] = Field(None, alias="Risk Source(s)")
|
| 47 |
+
|
| 48 |
+
context_info: Optional[str] = Field(None, alias="Context Info")
|
| 49 |
+
proof_of_concept: Optional[str] = Field(None, alias="Proof-of-Concept Exploit")
|
| 50 |
+
|
| 51 |
+
submitter_relationship: Optional[str] = Field(None, alias="Submitter Relationship")
|
| 52 |
+
submitter_relationship_other: Optional[str] = Field(None, alias="Submitter_Relationship_Other")
|
| 53 |
+
incident_locations: Optional[str] = Field(None, alias="Incident Location(s)")
|
| 54 |
+
harm_narrative: Optional[str] = Field(None, alias="Harm Narrative")
|
| 55 |
+
|
| 56 |
+
attacker_resources: Optional[List[str]] = Field(default=[], alias="Attacker Resources")
|
| 57 |
+
attacker_objectives: Optional[List[str]] = Field(default=[], alias="Attacker Objectives")
|
| 58 |
+
objective_context: Optional[str] = Field(None, alias="Objective Context")
|
| 59 |
+
detection_methods: Optional[List[str]] = Field(default=[], alias="Detection")
|
| 60 |
+
|
| 61 |
+
statistical_argument: Optional[str] = Field(None, alias="Statistical Argument with Examples")
|
| 62 |
+
|
| 63 |
+
disclosure_intent: str = Field(..., alias="Disclosure Intent")
|
| 64 |
+
disclosure_timeline: Optional[str] = Field(None, alias="Disclosure Timeline")
|
| 65 |
+
disclosure_channels: Optional[List[str]] = Field(default=[], alias="Disclosure Channels")
|
| 66 |
+
disclosure_channels_other: Optional[str] = Field(None, alias="Disclosure_Channels_Other")
|
| 67 |
+
embargo_request: Optional[str] = Field(None, alias="Embargo Request")
|
| 68 |
+
|
| 69 |
+
@validator('disclosure_intent')
|
| 70 |
+
def validate_disclosure_intent(cls, v):
|
| 71 |
+
valid_intents = {'Yes', 'No', 'Undecided', 'Already Public Knowledge'}
|
| 72 |
+
if v not in valid_intents:
|
| 73 |
+
raise ValueError(f'Disclosure intent must be one of: {valid_intents}')
|
| 74 |
+
return v
|
| 75 |
+
|
| 76 |
+
@validator('severity')
|
| 77 |
+
def validate_severity(cls, v):
|
| 78 |
+
if v is None:
|
| 79 |
+
return v
|
| 80 |
+
valid_severities = {
|
| 81 |
+
'Negligible', 'Low', 'Medium', 'High', 'Critical', 'Significant'
|
| 82 |
}
|
| 83 |
+
if v not in valid_severities:
|
| 84 |
+
raise ValueError(f'Severity must be one of: {valid_severities}')
|
| 85 |
+
return v
|
| 86 |
+
|
| 87 |
+
class AISystem(BaseModel):
|
| 88 |
+
"""Enriched AI System information"""
|
| 89 |
+
|
| 90 |
+
id: str = Field(..., description="System identifier/URL")
|
| 91 |
+
name: str = Field(..., description="System name")
|
| 92 |
+
version: str = Field(..., description="System version")
|
| 93 |
+
slug: str = Field(..., description="Internal slug for lookups")
|
| 94 |
+
display_name: str = Field(..., description="Human-friendly display name")
|
| 95 |
+
system_type: str = Field(default="known", description="'known' or 'unknown'")
|
| 96 |
+
description: Optional[str] = Field(None, description="For unknown systems")
|
| 97 |
+
publisher_info: Optional[Dict[str, Any]] = Field(None, description="Publisher/organization data")
|
| 98 |
+
|
| 99 |
+
class ProcessedAIFlawReport(BaseModel):
|
| 100 |
+
"""Fully processed flaw report with enriched data"""
|
| 101 |
+
report_id: str = Field(..., description="Generated report ID")
|
| 102 |
+
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
| 103 |
+
|
| 104 |
+
ai_systems: List[AISystem] = Field(..., description="Fully enriched system data")
|
| 105 |
+
|
| 106 |
+
# Core Flaw Data
|
| 107 |
+
reporter_id: Optional[str] = None
|
| 108 |
+
session_id: Optional[str] = None
|
| 109 |
+
flaw_timestamp_start: Optional[str] = None
|
| 110 |
+
flaw_description: str
|
| 111 |
+
policy_violation: str
|
| 112 |
+
|
| 113 |
+
# Assessment Data
|
| 114 |
+
severity: str
|
| 115 |
+
prevalence: str
|
| 116 |
+
impacts: List[str] = Field(default=[])
|
| 117 |
+
specific_harm_types: List[str] = Field(default=[])
|
| 118 |
+
impacted_stakeholders: List[str] = Field(default=[])
|
| 119 |
+
|
| 120 |
+
# Report Classification
|
| 121 |
+
report_types: List[str] = Field(default=[])
|
| 122 |
+
|
| 123 |
+
# Conditional Sections
|
| 124 |
+
incident_data: Optional[Dict[str, Any]] = None
|
| 125 |
+
security_data: Optional[Dict[str, Any]] = None
|
| 126 |
+
vulnerability_data: Optional[Dict[str, Any]] = None
|
| 127 |
+
hazard_data: Optional[Dict[str, Any]] = None
|
| 128 |
+
|
| 129 |
+
# Disclosure Information
|
| 130 |
+
disclosure_intent: str
|
| 131 |
+
disclosure_timeline: Optional[str] = None
|
| 132 |
+
disclosure_channels: List[str] = Field(default=[])
|
| 133 |
+
|
| 134 |
+
raw_data: Optional[Dict[str, Any]] = Field(default=None, repr=False)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def clean_internal_fields(data: Dict[str, Any]) -> Dict[str, Any]:
|
| 138 |
+
"""Remove fields starting with underscore for clean JSON-LD output."""
|
| 139 |
+
if isinstance(data, dict):
|
| 140 |
+
return {k: clean_internal_fields(v) for k, v in data.items() if not k.startswith("_")}
|
| 141 |
+
elif isinstance(data, list):
|
| 142 |
+
return [clean_internal_fields(item) for item in data]
|
| 143 |
+
else:
|
| 144 |
+
return data
|
| 145 |
+
|
| 146 |
+
class AIFlawKnowledgeBase:
|
| 147 |
+
"""Knowledge base for AI systems and organizations"""
|
| 148 |
+
|
| 149 |
+
def __init__(self, kb_path: str = "knowledge-base"):
|
| 150 |
+
self.kb_path = Path(kb_path)
|
| 151 |
+
self.systems_data = None
|
| 152 |
+
self.organizations_data = None
|
| 153 |
+
self.slug_map = {}
|
| 154 |
+
self._load_knowledge_base()
|
| 155 |
+
|
| 156 |
+
def _load_knowledge_base(self):
|
| 157 |
+
"""Load knowledge base files or create minimal fallback data"""
|
| 158 |
+
try:
|
| 159 |
+
if (self.kb_path / "ai-systems.jsonld").exists():
|
| 160 |
+
with open(self.kb_path / "ai-systems.jsonld") as f:
|
| 161 |
+
self.systems_data = json.load(f)
|
| 162 |
+
else:
|
| 163 |
+
self.systems_data = {"@graph": []}
|
| 164 |
+
|
| 165 |
+
if (self.kb_path / "organizations.jsonld").exists():
|
| 166 |
+
with open(self.kb_path / "organizations.jsonld") as f:
|
| 167 |
+
self.organizations_data = json.load(f)
|
| 168 |
+
else:
|
| 169 |
+
self.organizations_data = {"@graph": []}
|
| 170 |
+
|
| 171 |
+
except (FileNotFoundError, json.JSONDecodeError):
|
| 172 |
+
self.systems_data = {"@graph": []}
|
| 173 |
+
self.organizations_data = {"@graph": []}
|
| 174 |
+
|
| 175 |
+
self._build_slug_map()
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def _build_slug_map(self):
|
| 179 |
+
"""Build slug to system/org mapping"""
|
| 180 |
+
self.slug_map = {}
|
| 181 |
+
|
| 182 |
+
if self.systems_data:
|
| 183 |
+
for system in self.systems_data.get("@graph", []):
|
| 184 |
+
slug = system.get("_aifr_internal", {}).get("slug")
|
| 185 |
+
if slug:
|
| 186 |
+
self.slug_map[slug] = system
|
| 187 |
+
|
| 188 |
+
if self.organizations_data:
|
| 189 |
+
for org in self.organizations_data.get("@graph", []):
|
| 190 |
+
slug = org.get("_aifr_internal", {}).get("slug")
|
| 191 |
+
if slug:
|
| 192 |
+
self.slug_map[slug] = org
|
| 193 |
+
|
| 194 |
+
def find_system_by_name_or_slug(self, identifier: str) -> Optional[Dict[str, Any]]:
|
| 195 |
+
"""Find system by name or slug with fuzzy matching"""
|
| 196 |
+
# Direct slug match
|
| 197 |
+
if identifier in self.slug_map:
|
| 198 |
+
return self.slug_map[identifier]
|
| 199 |
+
|
| 200 |
+
# Search by name (case-insensitive partial match)
|
| 201 |
+
identifier_lower = identifier.lower()
|
| 202 |
+
for system in self.systems_data.get("@graph", []):
|
| 203 |
+
name = system.get("name", "").lower()
|
| 204 |
+
display_name = system.get("_aifr_internal", {}).get("displayName", "").lower()
|
| 205 |
+
|
| 206 |
+
if (identifier_lower in name or
|
| 207 |
+
identifier_lower in display_name or
|
| 208 |
+
name in identifier_lower or
|
| 209 |
+
display_name in identifier_lower):
|
| 210 |
+
return system
|
| 211 |
+
|
| 212 |
+
return None
|
| 213 |
+
|
| 214 |
+
def find_organization_by_id(self, org_id: str) -> Optional[Dict[str, Any]]:
|
| 215 |
+
"""Find organization by @id URI"""
|
| 216 |
+
for org in self.organizations_data.get("@graph", []):
|
| 217 |
+
if org.get("@id") == org_id:
|
| 218 |
+
return org
|
| 219 |
+
return None
|
| 220 |
|
| 221 |
+
def get_system_jsonld(self, identifier: str) -> Optional[Dict[str, Any]]:
|
| 222 |
+
"""Get clean JSON-LD representation of system with full publisher data"""
|
| 223 |
+
system = self.find_system_by_name_or_slug(identifier)
|
| 224 |
+
if not system:
|
| 225 |
+
return None
|
| 226 |
+
|
| 227 |
+
jsonld_system = clean_internal_fields(system)
|
| 228 |
+
|
| 229 |
+
if "@type" not in jsonld_system:
|
| 230 |
+
jsonld_system["@type"] = "schema:SoftwareApplication"
|
| 231 |
+
|
| 232 |
+
publisher_id = system.get("publisher", {}).get("@id")
|
| 233 |
+
if publisher_id:
|
| 234 |
+
org_data = self.find_organization_by_id(publisher_id)
|
| 235 |
+
if org_data:
|
| 236 |
+
publisher_jsonld = clean_internal_fields(org_data)
|
| 237 |
+
if "@type" not in publisher_jsonld:
|
| 238 |
+
publisher_jsonld["@type"] = "schema:Organization"
|
| 239 |
+
jsonld_system["publisher"] = publisher_jsonld
|
| 240 |
+
|
| 241 |
+
return jsonld_system
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def process_raw_report(raw_data: Dict[str, Any]) -> ProcessedAIFlawReport:
|
| 245 |
+
"""Convert raw form data to processed report by resolving AI systems"""
|
| 246 |
+
|
| 247 |
+
raw_report = RawAIFlawReport.model_validate(raw_data)
|
| 248 |
+
kb = AIFlawKnowledgeBase()
|
| 249 |
+
|
| 250 |
+
report_id = raw_data.get("Report ID") or f"AFL-{hashlib.md5(json.dumps(raw_data, sort_keys=True).encode()).hexdigest()[:8]}"
|
| 251 |
+
|
| 252 |
+
ai_systems = []
|
| 253 |
+
systems_list = raw_report.systems or []
|
| 254 |
+
for system_name in systems_list:
|
| 255 |
+
system_data = kb.find_system_by_name_or_slug(system_name)
|
| 256 |
+
if system_data:
|
| 257 |
+
internal_data = system_data.get("_aifr_internal", {})
|
| 258 |
+
ai_system = AISystem(
|
| 259 |
+
id=system_data.get("@id", f"https://aiflawreports.org/systems/{system_name}"),
|
| 260 |
+
name=system_data.get("name", system_name),
|
| 261 |
+
version=system_data.get("version", ""),
|
| 262 |
+
slug=internal_data.get("slug", system_name),
|
| 263 |
+
display_name=internal_data.get("displayName", system_name),
|
| 264 |
+
system_type="known"
|
| 265 |
+
)
|
| 266 |
+
else:
|
| 267 |
+
ai_system = AISystem(
|
| 268 |
+
id=f"https://aiflawreports.org/systems/{system_name.replace(' ', '_')}",
|
| 269 |
+
name=system_name,
|
| 270 |
+
version="",
|
| 271 |
+
slug=system_name,
|
| 272 |
+
display_name=system_name,
|
| 273 |
+
system_type="partially_known"
|
| 274 |
+
)
|
| 275 |
+
ai_systems.append(ai_system)
|
| 276 |
+
|
| 277 |
+
if not ai_systems:
|
| 278 |
+
ai_systems.append(AISystem(
|
| 279 |
+
id=f"https://aiflawreports.org/reports/{report_id}/unknown-system",
|
| 280 |
+
name="Unknown System",
|
| 281 |
+
version="",
|
| 282 |
+
slug="",
|
| 283 |
+
display_name="Unknown System",
|
| 284 |
+
system_type="unknown",
|
| 285 |
+
description="No specific system identified"
|
| 286 |
+
))
|
| 287 |
+
|
| 288 |
+
if raw_report.incident_description:
|
| 289 |
+
description = raw_report.incident_description
|
| 290 |
+
elif raw_report.incident_description_detailed:
|
| 291 |
+
description = f"**Detailed Description:**\n{raw_report.incident_description_detailed}"
|
| 292 |
+
elif raw_report.flaw_description:
|
| 293 |
+
description = raw_report.flaw_description
|
| 294 |
+
elif raw_report.flaw_description_detailed:
|
| 295 |
+
description = f"**Detailed Description:**\n{raw_report.flaw_description_detailed}"
|
| 296 |
+
else:
|
| 297 |
+
description = "No description provided"
|
| 298 |
+
|
| 299 |
+
policy_violation = raw_report.policy_violation or raw_report.potential_policy_violation or "Not specified"
|
| 300 |
+
|
| 301 |
+
incident_data = None
|
| 302 |
+
if "Real-World Incidents" in raw_report.report_types:
|
| 303 |
+
incident_data = {
|
| 304 |
+
"description": raw_report.incident_description,
|
| 305 |
+
"detailed_description": raw_report.incident_description_detailed,
|
| 306 |
+
"locations": raw_report.incident_locations,
|
| 307 |
+
"harm_narrative": raw_report.harm_narrative,
|
| 308 |
+
"submitter_relationship": raw_report.submitter_relationship,
|
| 309 |
+
"submitter_relationship_other": raw_report.submitter_relationship_other
|
| 310 |
}
|
| 311 |
|
| 312 |
+
security_data = None
|
| 313 |
+
if any(rt in raw_report.report_types for rt in ["Malign Actor", "Security Incident Report"]):
|
| 314 |
+
security_data = {
|
| 315 |
+
"attacker_resources": raw_report.attacker_resources or [],
|
| 316 |
+
"attacker_objectives": raw_report.attacker_objectives or [],
|
| 317 |
+
"objective_context": raw_report.objective_context,
|
| 318 |
+
"detection_methods": raw_report.detection_methods or []
|
| 319 |
}
|
| 320 |
|
| 321 |
+
vulnerability_data = None
|
| 322 |
+
if "Vulnerability Report" in raw_report.report_types:
|
| 323 |
+
vulnerability_data = {
|
| 324 |
+
"proof_of_concept": raw_report.proof_of_concept
|
| 325 |
}
|
| 326 |
|
| 327 |
+
hazard_data = None
|
| 328 |
+
if "Hazard Report" in raw_report.report_types:
|
| 329 |
+
hazard_data = {
|
| 330 |
+
"statistical_argument": raw_report.statistical_argument
|
| 331 |
}
|
| 332 |
|
| 333 |
+
processed_report = ProcessedAIFlawReport(
|
| 334 |
+
report_id=report_id,
|
| 335 |
+
ai_systems=ai_systems,
|
| 336 |
+
reporter_id=raw_report.reporter_id,
|
| 337 |
+
session_id=raw_report.session_id,
|
| 338 |
+
flaw_timestamp_start=raw_report.flaw_timestamp_start,
|
| 339 |
+
flaw_description=description,
|
| 340 |
+
policy_violation=policy_violation,
|
| 341 |
+
severity=raw_report.severity or "Unknown",
|
| 342 |
+
prevalence=raw_report.prevalence or "Unknown",
|
| 343 |
+
impacts=raw_report.impacts or [],
|
| 344 |
+
specific_harm_types=raw_report.specific_harm_types or [],
|
| 345 |
+
impacted_stakeholders=raw_report.impacted_stakeholders or [],
|
| 346 |
+
report_types=raw_report.report_types,
|
| 347 |
+
incident_data=incident_data,
|
| 348 |
+
security_data=security_data,
|
| 349 |
+
vulnerability_data=vulnerability_data,
|
| 350 |
+
hazard_data=hazard_data,
|
| 351 |
+
disclosure_intent=raw_report.disclosure_intent,
|
| 352 |
+
disclosure_timeline=raw_report.disclosure_timeline,
|
| 353 |
+
disclosure_channels=raw_report.disclosure_channels or []
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
processed_report.raw_data = raw_data
|
| 357 |
+
return processed_report
|
| 358 |
+
|
| 359 |
+
def _normalized_description(text: Optional[str]) -> str:
|
| 360 |
+
if not text:
|
| 361 |
+
return "No description provided"
|
| 362 |
+
prefix = "**Detailed Description:**"
|
| 363 |
+
t = text.lstrip()
|
| 364 |
+
if t.startswith(prefix):
|
| 365 |
+
t = t[len(prefix):].lstrip()
|
| 366 |
+
return t
|
| 367 |
+
|
| 368 |
+
def serialize_to_jsonld(processed_report: ProcessedAIFlawReport) -> Dict[str, Any]:
|
| 369 |
+
kb = AIFlawKnowledgeBase()
|
| 370 |
+
|
| 371 |
+
jsonld_systems, system_names = [], []
|
| 372 |
+
for system in processed_report.ai_systems:
|
| 373 |
+
if system.system_type == "unknown":
|
| 374 |
+
jsonld_systems.append({
|
| 375 |
+
"@type": "schema:SoftwareApplication",
|
| 376 |
+
"@id": system.id,
|
| 377 |
+
"name": "Unknown System",
|
| 378 |
+
"description": system.description
|
| 379 |
+
})
|
| 380 |
+
system_names.append("Unknown System")
|
| 381 |
+
else:
|
| 382 |
+
js = kb.get_system_jsonld(system.slug or system.name)
|
| 383 |
+
if js:
|
| 384 |
+
jsonld_systems.append(js)
|
| 385 |
+
system_names.append(system.display_name)
|
| 386 |
+
else:
|
| 387 |
+
jsonld_systems.append({
|
| 388 |
+
"@type": "schema:SoftwareApplication",
|
| 389 |
+
"@id": system.id,
|
| 390 |
+
"name": system.name,
|
| 391 |
+
"version": system.version
|
| 392 |
+
})
|
| 393 |
+
system_names.append(system.display_name)
|
| 394 |
+
|
| 395 |
+
jsonld_report = {
|
| 396 |
+
"@context": [
|
| 397 |
+
"https://schema.org/",
|
| 398 |
+
{
|
| 399 |
+
"aifr": "https://aiflawreports.org/schema/",
|
| 400 |
+
"aiSystem": "aifr:aiSystem",
|
| 401 |
+
"severity": "aifr:severity",
|
| 402 |
+
"prevalence": "aifr:prevalence",
|
| 403 |
+
"impacts": "aifr:impacts",
|
| 404 |
+
"reportType": "aifr:reportType",
|
| 405 |
+
"riskSource": "aifr:riskSource",
|
| 406 |
+
"contextInfo": "aifr:contextInfo"
|
| 407 |
+
}
|
| 408 |
+
],
|
| 409 |
+
"@type": "aifr:AIFlawReport",
|
| 410 |
+
"@id": f"https://aiflawreports.org/reports/{processed_report.report_id}",
|
| 411 |
+
"name": f"AI Flaw Report: {', '.join(system_names)}",
|
| 412 |
+
"description": _normalized_description(processed_report.flaw_description),
|
| 413 |
+
"aiSystem": jsonld_systems,
|
| 414 |
+
"severity": processed_report.severity,
|
| 415 |
+
"prevalence": processed_report.prevalence,
|
| 416 |
+
"impacts": processed_report.impacts,
|
| 417 |
+
"reportType": processed_report.report_types,
|
| 418 |
+
"dateCreated": processed_report.created_at.isoformat(),
|
| 419 |
+
"identifier": processed_report.report_id,
|
| 420 |
+
"aifr:policyViolation": processed_report.policy_violation,
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
if processed_report.reporter_id:
|
| 424 |
+
jsonld_report["author"] = {"@type": "schema:Person", "identifier": processed_report.reporter_id}
|
| 425 |
+
if processed_report.session_id:
|
| 426 |
+
jsonld_report["aifr:sessionId"] = processed_report.session_id
|
| 427 |
+
if processed_report.flaw_timestamp_start:
|
| 428 |
+
jsonld_report["aifr:flawTimestamp"] = processed_report.flaw_timestamp_start
|
| 429 |
+
if processed_report.impacted_stakeholders:
|
| 430 |
+
jsonld_report["aifr:impactedStakeholders"] = processed_report.impacted_stakeholders
|
| 431 |
+
if processed_report.specific_harm_types:
|
| 432 |
+
jsonld_report["aifr:specificHarmTypes"] = processed_report.specific_harm_types
|
| 433 |
+
if processed_report.incident_data:
|
| 434 |
+
inc = {
|
| 435 |
+
"@type": "aifr:RealWorldIncident",
|
| 436 |
+
"description": processed_report.incident_data.get("description"),
|
| 437 |
+
"location": processed_report.incident_data.get("locations"),
|
| 438 |
+
"aifr:harmNarrative": processed_report.incident_data.get("harm_narrative"),
|
| 439 |
+
}
|
| 440 |
+
if processed_report.incident_data.get("submitter_relationship"):
|
| 441 |
+
inc["aifr:submitterRelationship"] = processed_report.incident_data["submitter_relationship"]
|
| 442 |
+
jsonld_report["aifr:incident"] = inc
|
| 443 |
+
if processed_report.security_data:
|
| 444 |
+
sec = {
|
| 445 |
+
"@type": "aifr:SecurityIncident",
|
| 446 |
+
"aifr:attackerResources": processed_report.security_data.get("attacker_resources", []),
|
| 447 |
+
"aifr:attackerObjectives": processed_report.security_data.get("attacker_objectives", []),
|
| 448 |
+
"aifr:detectionMethods": processed_report.security_data.get("detection_methods", [])
|
| 449 |
}
|
| 450 |
+
if processed_report.security_data.get("objective_context"):
|
| 451 |
+
sec["aifr:objectiveContext"] = processed_report.security_data["objective_context"]
|
| 452 |
+
jsonld_report["aifr:securityAspect"] = sec
|
| 453 |
+
if processed_report.vulnerability_data:
|
| 454 |
+
jsonld_report["aifr:vulnerability"] = {
|
| 455 |
+
"@type": "aifr:Vulnerability",
|
| 456 |
+
"aifr:proofOfConcept": processed_report.vulnerability_data.get("proof_of_concept")
|
| 457 |
+
}
|
| 458 |
+
if processed_report.hazard_data:
|
| 459 |
+
jsonld_report["aifr:hazard"] = {
|
| 460 |
+
"@type": "aifr:Hazard",
|
| 461 |
+
"aifr:statisticalArgument": processed_report.hazard_data.get("statistical_argument")
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
disclosure = {"@type": "aifr:DisclosurePlan", "aifr:intent": processed_report.disclosure_intent}
|
| 465 |
+
if processed_report.disclosure_timeline:
|
| 466 |
+
disclosure["aifr:timeline"] = processed_report.disclosure_timeline
|
| 467 |
+
if processed_report.disclosure_channels:
|
| 468 |
+
disclosure["aifr:channels"] = processed_report.disclosure_channels
|
| 469 |
+
jsonld_report["aifr:disclosure"] = disclosure
|
| 470 |
+
|
| 471 |
+
if getattr(processed_report, "raw_data", None):
|
| 472 |
+
rd = processed_report.raw_data
|
| 473 |
+
jsonld_report["aifr:raw"] = rd
|
| 474 |
+
|
| 475 |
+
# Map Risk Source(s) -> aifr:riskSource
|
| 476 |
+
rs = rd.get("Risk Source(s)")
|
| 477 |
+
if rs:
|
| 478 |
+
jsonld_report["aifr:riskSource"] = {
|
| 479 |
+
"@type": "aifr:RiskSourceAnalysis",
|
| 480 |
+
"aifr:responsibleFactors": rs.get("Responsible Factors", []),
|
| 481 |
+
"aifr:responsibleFactorsSubcategories": rs.get("Responsible Factors Subcategories", {}),
|
| 482 |
+
"aifr:responsibleFactorsContext": rs.get("Responsible Factors Context", "")
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
# Map Context Info -> aifr:contextInfo
|
| 486 |
+
if "Context Info" in rd and rd["Context Info"] not in (None, ""):
|
| 487 |
+
jsonld_report["aifr:contextInfo"] = rd["Context Info"]
|
| 488 |
+
|
| 489 |
+
# Map Submission Timestamp -> aifr:submissionTimestamp
|
| 490 |
+
if "Submission Timestamp" in rd and rd["Submission Timestamp"]:
|
| 491 |
+
jsonld_report["aifr:submissionTimestamp"] = rd["Submission Timestamp"]
|
| 492 |
+
|
| 493 |
+
if "Embargo Request" in rd and rd["Embargo Request"]:
|
| 494 |
+
jsonld_report["aifr:disclosure"]["aifr:embargoRequest"] = rd["Embargo Request"]
|
| 495 |
+
|
| 496 |
+
return jsonld_report
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
def generate_machine_readable_output(form_data: Dict[str, Any]) -> str:
|
| 501 |
+
"""
|
| 502 |
+
Main function to replace the original manual JSON-LD generation
|
| 503 |
+
Uses Pydantic validation, knowledge base enrichment, and proper JSON-LD structure
|
| 504 |
+
"""
|
| 505 |
+
try:
|
| 506 |
+
processed_report = process_raw_report(form_data)
|
| 507 |
+
|
| 508 |
+
jsonld_report = serialize_to_jsonld(processed_report)
|
| 509 |
+
|
| 510 |
+
try:
|
| 511 |
+
compacted = jsonld.compact(jsonld_report, jsonld_report["@context"])
|
| 512 |
+
return json.dumps(compacted, indent=2)
|
| 513 |
+
except Exception:
|
| 514 |
+
return json.dumps(jsonld_report, indent=2)
|
| 515 |
+
|
| 516 |
+
except Exception as e:
|
| 517 |
+
return json.dumps({
|
| 518 |
+
"@context": "https://schema.org/",
|
| 519 |
+
"@type": "Report",
|
| 520 |
+
"@id": f"https://aiflawreports.org/reports/{form_data.get('Report ID', 'unknown')}",
|
| 521 |
+
"name": "AI Flaw Report",
|
| 522 |
+
"description": form_data.get("Flaw Description", ""),
|
| 523 |
+
"dateCreated": datetime.now(timezone.utc).isoformat(),
|
| 524 |
+
"aifr:processingError": str(e)
|
| 525 |
+
}, indent=2)
|
| 526 |
+
|
| 527 |
+
def validate_jsonld_output(jsonld_string: str) -> tuple[bool, Optional[str]]:
|
| 528 |
+
"""
|
| 529 |
+
Validate the generated JSON-LD
|
| 530 |
+
Returns (is_valid, error_message)
|
| 531 |
+
"""
|
| 532 |
+
try:
|
| 533 |
+
data = json.loads(jsonld_string)
|
| 534 |
+
|
| 535 |
+
if "@context" not in data:
|
| 536 |
+
return False, "Missing @context"
|
| 537 |
+
if "@type" not in data:
|
| 538 |
+
return False, "Missing @type"
|
| 539 |
+
if "@id" not in data:
|
| 540 |
+
return False, "Missing @id"
|
| 541 |
+
|
| 542 |
+
try:
|
| 543 |
+
expanded = jsonld.expand(data)
|
| 544 |
+
return True, None
|
| 545 |
+
except Exception as e:
|
| 546 |
+
return False, f"JSON-LD expansion error: {e}"
|
| 547 |
+
|
| 548 |
+
except json.JSONDecodeError as e:
|
| 549 |
+
return False, f"Invalid JSON: {e}"
|
| 550 |
+
except Exception as e:
|
| 551 |
+
return False, f"Validation error: {e}"
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
def update_storage_interface_for_jsonld(form_data: Dict[str, Any]):
|
| 555 |
+
"""
|
| 556 |
+
Enhanced storage function that generates both formats
|
| 557 |
+
Can be integrated into existing storage providers
|
| 558 |
+
"""
|
| 559 |
+
jsonld_output = generate_machine_readable_output(form_data)
|
| 560 |
+
|
| 561 |
+
is_valid, error = validate_jsonld_output(jsonld_output)
|
| 562 |
+
|
| 563 |
+
if not is_valid:
|
| 564 |
+
print(f"JSON-LD validation warning: {error}")
|
| 565 |
|
| 566 |
+
return {
|
| 567 |
+
"form_data": form_data,
|
| 568 |
+
"jsonld": json.loads(jsonld_output),
|
| 569 |
+
"jsonld_string": jsonld_output,
|
| 570 |
+
"validation_status": is_valid,
|
| 571 |
+
"validation_error": error
|
| 572 |
+
}
|