Spaces:
Running
Running
refine
Browse files
app.py
CHANGED
|
@@ -223,13 +223,18 @@ def extract_review_metadata_from_bigquery(review_row):
|
|
| 223 |
merged_at = getattr(review_row, 'merged_at', None)
|
| 224 |
closed_at = getattr(review_row, 'closed_at', None)
|
| 225 |
|
| 226 |
-
# Convert to ISO format if datetime
|
| 227 |
if hasattr(reviewed_at, 'isoformat'):
|
| 228 |
reviewed_at = reviewed_at.isoformat()
|
|
|
|
|
|
|
| 229 |
if merged_at and hasattr(merged_at, 'isoformat'):
|
| 230 |
merged_at = merged_at.isoformat()
|
|
|
|
|
|
|
| 231 |
if closed_at and hasattr(closed_at, 'isoformat'):
|
| 232 |
closed_at = closed_at.isoformat()
|
|
|
|
| 233 |
|
| 234 |
return {
|
| 235 |
'url': url,
|
|
|
|
| 223 |
merged_at = getattr(review_row, 'merged_at', None)
|
| 224 |
closed_at = getattr(review_row, 'closed_at', None)
|
| 225 |
|
| 226 |
+
# Convert to ISO format if datetime and normalize
|
| 227 |
if hasattr(reviewed_at, 'isoformat'):
|
| 228 |
reviewed_at = reviewed_at.isoformat()
|
| 229 |
+
reviewed_at = normalize_date_format(reviewed_at) if reviewed_at else None
|
| 230 |
+
|
| 231 |
if merged_at and hasattr(merged_at, 'isoformat'):
|
| 232 |
merged_at = merged_at.isoformat()
|
| 233 |
+
merged_at = normalize_date_format(merged_at) if merged_at else None
|
| 234 |
+
|
| 235 |
if closed_at and hasattr(closed_at, 'isoformat'):
|
| 236 |
closed_at = closed_at.isoformat()
|
| 237 |
+
closed_at = normalize_date_format(closed_at) if closed_at else None
|
| 238 |
|
| 239 |
return {
|
| 240 |
'url': url,
|
msr.py
CHANGED
|
@@ -51,6 +51,41 @@ def save_jsonl(filename, data):
|
|
| 51 |
f.write(json.dumps(item) + '\n')
|
| 52 |
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
def get_hf_token():
|
| 55 |
"""Get HuggingFace token from environment variables."""
|
| 56 |
token = os.getenv('HF_TOKEN')
|
|
@@ -267,20 +302,23 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
|
|
| 267 |
|
| 268 |
for row in results:
|
| 269 |
reviewer = row.reviewer
|
| 270 |
-
|
| 271 |
-
# Convert datetime objects to ISO strings
|
| 272 |
reviewed_at = row.reviewed_at
|
| 273 |
if hasattr(reviewed_at, 'isoformat'):
|
| 274 |
reviewed_at = reviewed_at.isoformat()
|
| 275 |
-
|
|
|
|
| 276 |
merged_at = row.merged_at
|
| 277 |
if hasattr(merged_at, 'isoformat'):
|
| 278 |
merged_at = merged_at.isoformat()
|
| 279 |
-
|
|
|
|
| 280 |
closed_at = row.closed_at
|
| 281 |
if hasattr(closed_at, 'isoformat'):
|
| 282 |
closed_at = closed_at.isoformat()
|
| 283 |
-
|
|
|
|
| 284 |
metadata_by_agent[reviewer].append({
|
| 285 |
'url': row.url,
|
| 286 |
'reviewed_at': reviewed_at,
|
|
|
|
| 51 |
f.write(json.dumps(item) + '\n')
|
| 52 |
|
| 53 |
|
| 54 |
+
def normalize_date_format(date_string):
|
| 55 |
+
"""
|
| 56 |
+
Convert date strings to standardized ISO 8601 format with Z suffix.
|
| 57 |
+
Handles both 'T' and space-separated datetime formats (including newlines).
|
| 58 |
+
Examples:
|
| 59 |
+
- 2025-10-15T23:23:47.983068 -> 2025-10-15T23:23:47Z
|
| 60 |
+
- 2025-06-17 21:21:07+00 -> 2025-06-17T21:21:07Z
|
| 61 |
+
"""
|
| 62 |
+
if not date_string or date_string == 'N/A':
|
| 63 |
+
return 'N/A'
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
import re
|
| 67 |
+
# Remove all whitespace (spaces, newlines, tabs) and replace with single space
|
| 68 |
+
date_string = re.sub(r'\s+', ' ', date_string.strip())
|
| 69 |
+
|
| 70 |
+
# Replace space with 'T' for ISO format compatibility
|
| 71 |
+
date_string = date_string.replace(' ', 'T')
|
| 72 |
+
|
| 73 |
+
# Fix incomplete timezone offset (+00 or -00 -> +00:00 or -00:00)
|
| 74 |
+
# Check if timezone offset exists and is incomplete
|
| 75 |
+
if len(date_string) >= 3:
|
| 76 |
+
if date_string[-3:-2] in ('+', '-') and ':' not in date_string[-3:]:
|
| 77 |
+
date_string = date_string + ':00'
|
| 78 |
+
|
| 79 |
+
# Parse the date string (handles both with and without microseconds)
|
| 80 |
+
dt = datetime.fromisoformat(date_string.replace('Z', '+00:00'))
|
| 81 |
+
|
| 82 |
+
# Convert to standardized format
|
| 83 |
+
return dt.strftime('%Y-%m-%dT%H:%M:%SZ')
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Warning: Could not parse date '{date_string}': {e}")
|
| 86 |
+
return date_string
|
| 87 |
+
|
| 88 |
+
|
| 89 |
def get_hf_token():
|
| 90 |
"""Get HuggingFace token from environment variables."""
|
| 91 |
token = os.getenv('HF_TOKEN')
|
|
|
|
| 302 |
|
| 303 |
for row in results:
|
| 304 |
reviewer = row.reviewer
|
| 305 |
+
|
| 306 |
+
# Convert datetime objects to ISO strings and normalize
|
| 307 |
reviewed_at = row.reviewed_at
|
| 308 |
if hasattr(reviewed_at, 'isoformat'):
|
| 309 |
reviewed_at = reviewed_at.isoformat()
|
| 310 |
+
reviewed_at = normalize_date_format(reviewed_at) if reviewed_at else None
|
| 311 |
+
|
| 312 |
merged_at = row.merged_at
|
| 313 |
if hasattr(merged_at, 'isoformat'):
|
| 314 |
merged_at = merged_at.isoformat()
|
| 315 |
+
merged_at = normalize_date_format(merged_at) if merged_at else None
|
| 316 |
+
|
| 317 |
closed_at = row.closed_at
|
| 318 |
if hasattr(closed_at, 'isoformat'):
|
| 319 |
closed_at = closed_at.isoformat()
|
| 320 |
+
closed_at = normalize_date_format(closed_at) if closed_at else None
|
| 321 |
+
|
| 322 |
metadata_by_agent[reviewer].append({
|
| 323 |
'url': row.url,
|
| 324 |
'reviewed_at': reviewed_at,
|