add
Browse files- .gitignore +1 -0
- app.py +119 -33
- msr.py +174 -16
.gitignore
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
*.env
|
| 2 |
*.venv
|
| 3 |
*.ipynb
|
|
|
|
| 1 |
+
*.claude
|
| 2 |
*.env
|
| 3 |
*.venv
|
| 4 |
*.ipynb
|
app.py
CHANGED
|
@@ -228,6 +228,7 @@ def fetch_issues_with_time_partition(base_query, start_date, end_date, headers,
|
|
| 228 |
"""
|
| 229 |
Fetch issues within a specific time range using time-based partitioning.
|
| 230 |
Recursively splits the time range if hitting the 1000-result limit.
|
|
|
|
| 231 |
|
| 232 |
Args:
|
| 233 |
debug_limit: If set, stops fetching after this many issues (for testing)
|
|
@@ -235,9 +236,27 @@ def fetch_issues_with_time_partition(base_query, start_date, end_date, headers,
|
|
| 235 |
|
| 236 |
Returns the number of issues found in this time partition.
|
| 237 |
"""
|
| 238 |
-
#
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
# Add date range to query
|
| 243 |
query = f'{base_query} created:{start_str}..{end_str}'
|
|
@@ -291,30 +310,24 @@ def fetch_issues_with_time_partition(base_query, start_date, end_date, headers,
|
|
| 291 |
if total_count > 1000 and page == 10:
|
| 292 |
print(f"{indent} ⚠️ Hit 1000-result limit ({total_count} total). Splitting time range...")
|
| 293 |
|
| 294 |
-
#
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
split_dates = [
|
| 304 |
-
start_date,
|
| 305 |
-
start_date + quarter_diff,
|
| 306 |
-
start_date + quarter_diff * 2,
|
| 307 |
-
start_date + quarter_diff * 3,
|
| 308 |
-
end_date
|
| 309 |
-
]
|
| 310 |
|
| 311 |
total_from_splits = 0
|
| 312 |
-
for i in range(
|
| 313 |
split_start = split_dates[i]
|
| 314 |
split_end = split_dates[i + 1]
|
| 315 |
-
# Avoid overlapping ranges
|
| 316 |
if i > 0:
|
| 317 |
-
split_start = split_start + timedelta(
|
| 318 |
|
| 319 |
count = fetch_issues_with_time_partition(
|
| 320 |
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
|
@@ -322,19 +335,92 @@ def fetch_issues_with_time_partition(base_query, start_date, end_date, headers,
|
|
| 322 |
total_from_splits += count
|
| 323 |
|
| 324 |
return total_from_splits
|
| 325 |
-
else:
|
| 326 |
-
# Binary split for smaller ranges
|
| 327 |
-
mid_date = start_date + time_diff / 2
|
| 328 |
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
-
|
| 338 |
|
| 339 |
# Normal pagination: check if there are more pages
|
| 340 |
if len(items) < per_page or page >= 10:
|
|
|
|
| 228 |
"""
|
| 229 |
Fetch issues within a specific time range using time-based partitioning.
|
| 230 |
Recursively splits the time range if hitting the 1000-result limit.
|
| 231 |
+
Supports splitting by day, hour, minute, and second as needed.
|
| 232 |
|
| 233 |
Args:
|
| 234 |
debug_limit: If set, stops fetching after this many issues (for testing)
|
|
|
|
| 236 |
|
| 237 |
Returns the number of issues found in this time partition.
|
| 238 |
"""
|
| 239 |
+
# Calculate time difference
|
| 240 |
+
time_diff = end_date - start_date
|
| 241 |
+
total_seconds = time_diff.total_seconds()
|
| 242 |
+
|
| 243 |
+
# Determine granularity and format dates accordingly
|
| 244 |
+
if total_seconds >= 86400: # >= 1 day
|
| 245 |
+
# Use day granularity (YYYY-MM-DD)
|
| 246 |
+
start_str = start_date.strftime('%Y-%m-%d')
|
| 247 |
+
end_str = end_date.strftime('%Y-%m-%d')
|
| 248 |
+
elif total_seconds >= 3600: # >= 1 hour but < 1 day
|
| 249 |
+
# Use hour granularity (YYYY-MM-DDTHH:MM:SSZ)
|
| 250 |
+
start_str = start_date.strftime('%Y-%m-%dT%H:00:00Z')
|
| 251 |
+
end_str = end_date.strftime('%Y-%m-%dT%H:59:59Z')
|
| 252 |
+
elif total_seconds >= 60: # >= 1 minute but < 1 hour
|
| 253 |
+
# Use minute granularity (YYYY-MM-DDTHH:MM:SSZ)
|
| 254 |
+
start_str = start_date.strftime('%Y-%m-%dT%H:%M:00Z')
|
| 255 |
+
end_str = end_date.strftime('%Y-%m-%dT%H:%M:59Z')
|
| 256 |
+
else: # < 1 minute
|
| 257 |
+
# Use second granularity (YYYY-MM-DDTHH:MM:SSZ)
|
| 258 |
+
start_str = start_date.strftime('%Y-%m-%dT%H:%M:%SZ')
|
| 259 |
+
end_str = end_date.strftime('%Y-%m-%dT%H:%M:%SZ')
|
| 260 |
|
| 261 |
# Add date range to query
|
| 262 |
query = f'{base_query} created:{start_str}..{end_str}'
|
|
|
|
| 310 |
if total_count > 1000 and page == 10:
|
| 311 |
print(f"{indent} ⚠️ Hit 1000-result limit ({total_count} total). Splitting time range...")
|
| 312 |
|
| 313 |
+
# Determine how to split based on time range duration
|
| 314 |
+
if total_seconds < 2: # Less than 2 seconds - can't split further
|
| 315 |
+
print(f"{indent} ⚠️ Cannot split further (range < 2 seconds). Some results may be missing.")
|
| 316 |
+
break
|
| 317 |
+
|
| 318 |
+
elif total_seconds < 120: # Less than 2 minutes - split by seconds
|
| 319 |
+
# Split into 2-4 parts depending on range
|
| 320 |
+
num_splits = min(4, max(2, int(total_seconds / 30)))
|
| 321 |
+
split_duration = time_diff / num_splits
|
| 322 |
+
split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
total_from_splits = 0
|
| 325 |
+
for i in range(num_splits):
|
| 326 |
split_start = split_dates[i]
|
| 327 |
split_end = split_dates[i + 1]
|
| 328 |
+
# Avoid overlapping ranges (add 1 second to start)
|
| 329 |
if i > 0:
|
| 330 |
+
split_start = split_start + timedelta(seconds=1)
|
| 331 |
|
| 332 |
count = fetch_issues_with_time_partition(
|
| 333 |
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
|
|
|
| 335 |
total_from_splits += count
|
| 336 |
|
| 337 |
return total_from_splits
|
|
|
|
|
|
|
|
|
|
| 338 |
|
| 339 |
+
elif total_seconds < 7200: # Less than 2 hours - split by minutes
|
| 340 |
+
# Split into 2-4 parts
|
| 341 |
+
num_splits = min(4, max(2, int(total_seconds / 1800)))
|
| 342 |
+
split_duration = time_diff / num_splits
|
| 343 |
+
split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
|
| 344 |
+
|
| 345 |
+
total_from_splits = 0
|
| 346 |
+
for i in range(num_splits):
|
| 347 |
+
split_start = split_dates[i]
|
| 348 |
+
split_end = split_dates[i + 1]
|
| 349 |
+
# Avoid overlapping ranges (add 1 minute to start)
|
| 350 |
+
if i > 0:
|
| 351 |
+
split_start = split_start + timedelta(minutes=1)
|
| 352 |
+
|
| 353 |
+
count = fetch_issues_with_time_partition(
|
| 354 |
+
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
| 355 |
+
)
|
| 356 |
+
total_from_splits += count
|
| 357 |
+
|
| 358 |
+
return total_from_splits
|
| 359 |
+
|
| 360 |
+
elif total_seconds < 172800: # Less than 2 days - split by hours
|
| 361 |
+
# Split into 2-4 parts
|
| 362 |
+
num_splits = min(4, max(2, int(total_seconds / 43200)))
|
| 363 |
+
split_duration = time_diff / num_splits
|
| 364 |
+
split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
|
| 365 |
+
|
| 366 |
+
total_from_splits = 0
|
| 367 |
+
for i in range(num_splits):
|
| 368 |
+
split_start = split_dates[i]
|
| 369 |
+
split_end = split_dates[i + 1]
|
| 370 |
+
# Avoid overlapping ranges (add 1 hour to start)
|
| 371 |
+
if i > 0:
|
| 372 |
+
split_start = split_start + timedelta(hours=1)
|
| 373 |
+
|
| 374 |
+
count = fetch_issues_with_time_partition(
|
| 375 |
+
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
| 376 |
+
)
|
| 377 |
+
total_from_splits += count
|
| 378 |
+
|
| 379 |
+
return total_from_splits
|
| 380 |
+
|
| 381 |
+
else: # 2+ days - split by days
|
| 382 |
+
days_diff = time_diff.days
|
| 383 |
+
|
| 384 |
+
# Use aggressive splitting for large ranges or deep recursion
|
| 385 |
+
# Split into 4 parts if range is > 30 days, otherwise split in half
|
| 386 |
+
if days_diff > 30 or depth > 5:
|
| 387 |
+
# Split into 4 parts for more aggressive partitioning
|
| 388 |
+
quarter_diff = time_diff / 4
|
| 389 |
+
split_dates = [
|
| 390 |
+
start_date,
|
| 391 |
+
start_date + quarter_diff,
|
| 392 |
+
start_date + quarter_diff * 2,
|
| 393 |
+
start_date + quarter_diff * 3,
|
| 394 |
+
end_date
|
| 395 |
+
]
|
| 396 |
+
|
| 397 |
+
total_from_splits = 0
|
| 398 |
+
for i in range(4):
|
| 399 |
+
split_start = split_dates[i]
|
| 400 |
+
split_end = split_dates[i + 1]
|
| 401 |
+
# Avoid overlapping ranges
|
| 402 |
+
if i > 0:
|
| 403 |
+
split_start = split_start + timedelta(days=1)
|
| 404 |
+
|
| 405 |
+
count = fetch_issues_with_time_partition(
|
| 406 |
+
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
| 407 |
+
)
|
| 408 |
+
total_from_splits += count
|
| 409 |
+
|
| 410 |
+
return total_from_splits
|
| 411 |
+
else:
|
| 412 |
+
# Binary split for smaller ranges
|
| 413 |
+
mid_date = start_date + time_diff / 2
|
| 414 |
+
|
| 415 |
+
# Recursively fetch both halves
|
| 416 |
+
count1 = fetch_issues_with_time_partition(
|
| 417 |
+
base_query, start_date, mid_date, headers, issues_by_id, debug_limit, depth + 1
|
| 418 |
+
)
|
| 419 |
+
count2 = fetch_issues_with_time_partition(
|
| 420 |
+
base_query, mid_date + timedelta(days=1), end_date, headers, issues_by_id, debug_limit, depth + 1
|
| 421 |
+
)
|
| 422 |
|
| 423 |
+
return count1 + count2
|
| 424 |
|
| 425 |
# Normal pagination: check if there are more pages
|
| 426 |
if len(items) < per_page or page >= 10:
|
msr.py
CHANGED
|
@@ -224,17 +224,54 @@ def request_with_backoff(method, url, *, headers=None, params=None, json_body=No
|
|
| 224 |
return None
|
| 225 |
|
| 226 |
|
| 227 |
-
def fetch_issues_with_time_partition(base_query, start_date, end_date, headers, issues_by_id, debug_limit=None):
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
query = f'{base_query} created:{start_str}..{end_str}'
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
| 232 |
page = 1
|
| 233 |
per_page = 100
|
| 234 |
total_in_partition = 0
|
|
|
|
| 235 |
while True:
|
|
|
|
| 236 |
if debug_limit is not None and total_in_partition >= debug_limit:
|
| 237 |
-
print(f"
|
| 238 |
return total_in_partition
|
| 239 |
url = 'https://api.github.com/search/issues'
|
| 240 |
params = {
|
|
@@ -244,40 +281,161 @@ def fetch_issues_with_time_partition(base_query, start_date, end_date, headers,
|
|
| 244 |
'sort': 'created',
|
| 245 |
'order': 'asc'
|
| 246 |
}
|
|
|
|
| 247 |
try:
|
| 248 |
response = request_with_backoff('GET', url, headers=headers, params=params)
|
| 249 |
if response is None:
|
| 250 |
-
print(f"
|
| 251 |
return total_in_partition
|
|
|
|
| 252 |
if response.status_code != 200:
|
| 253 |
-
print(f"
|
| 254 |
return total_in_partition
|
|
|
|
| 255 |
data = response.json()
|
| 256 |
total_count = data.get('total_count', 0)
|
| 257 |
items = data.get('items', [])
|
|
|
|
| 258 |
if not items:
|
| 259 |
break
|
|
|
|
|
|
|
| 260 |
for issue in items:
|
| 261 |
issue_id = issue.get('id')
|
| 262 |
if issue_id and issue_id not in issues_by_id:
|
| 263 |
issues_by_id[issue_id] = issue
|
| 264 |
total_in_partition += 1
|
|
|
|
|
|
|
| 265 |
if total_count > 1000 and page == 10:
|
| 266 |
-
print(f"
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
if len(items) < per_page or page >= 10:
|
| 273 |
break
|
|
|
|
| 274 |
page += 1
|
| 275 |
-
time.sleep(0.5)
|
|
|
|
| 276 |
except Exception as e:
|
| 277 |
-
print(f"
|
| 278 |
return total_in_partition
|
|
|
|
| 279 |
if total_in_partition > 0:
|
| 280 |
-
print(f"
|
|
|
|
| 281 |
return total_in_partition
|
| 282 |
|
| 283 |
|
|
|
|
| 224 |
return None
|
| 225 |
|
| 226 |
|
| 227 |
+
def fetch_issues_with_time_partition(base_query, start_date, end_date, headers, issues_by_id, debug_limit=None, depth=0):
|
| 228 |
+
"""
|
| 229 |
+
Fetch issues within a specific time range using time-based partitioning.
|
| 230 |
+
Recursively splits the time range if hitting the 1000-result limit.
|
| 231 |
+
Supports splitting by day, hour, minute, and second as needed.
|
| 232 |
+
|
| 233 |
+
Args:
|
| 234 |
+
debug_limit: If set, stops fetching after this many issues (for testing)
|
| 235 |
+
depth: Current recursion depth (for tracking)
|
| 236 |
+
|
| 237 |
+
Returns the number of issues found in this time partition.
|
| 238 |
+
"""
|
| 239 |
+
# Calculate time difference
|
| 240 |
+
time_diff = end_date - start_date
|
| 241 |
+
total_seconds = time_diff.total_seconds()
|
| 242 |
+
|
| 243 |
+
# Determine granularity and format dates accordingly
|
| 244 |
+
if total_seconds >= 86400: # >= 1 day
|
| 245 |
+
# Use day granularity (YYYY-MM-DD)
|
| 246 |
+
start_str = start_date.strftime('%Y-%m-%d')
|
| 247 |
+
end_str = end_date.strftime('%Y-%m-%d')
|
| 248 |
+
elif total_seconds >= 3600: # >= 1 hour but < 1 day
|
| 249 |
+
# Use hour granularity (YYYY-MM-DDTHH:MM:SSZ)
|
| 250 |
+
start_str = start_date.strftime('%Y-%m-%dT%H:00:00Z')
|
| 251 |
+
end_str = end_date.strftime('%Y-%m-%dT%H:59:59Z')
|
| 252 |
+
elif total_seconds >= 60: # >= 1 minute but < 1 hour
|
| 253 |
+
# Use minute granularity (YYYY-MM-DDTHH:MM:SSZ)
|
| 254 |
+
start_str = start_date.strftime('%Y-%m-%dT%H:%M:00Z')
|
| 255 |
+
end_str = end_date.strftime('%Y-%m-%dT%H:%M:59Z')
|
| 256 |
+
else: # < 1 minute
|
| 257 |
+
# Use second granularity (YYYY-MM-DDTHH:MM:SSZ)
|
| 258 |
+
start_str = start_date.strftime('%Y-%m-%dT%H:%M:%SZ')
|
| 259 |
+
end_str = end_date.strftime('%Y-%m-%dT%H:%M:%SZ')
|
| 260 |
+
|
| 261 |
+
# Add date range to query
|
| 262 |
query = f'{base_query} created:{start_str}..{end_str}'
|
| 263 |
+
|
| 264 |
+
indent = " " + " " * depth
|
| 265 |
+
print(f"{indent}Searching range {start_str} to {end_str}...")
|
| 266 |
+
|
| 267 |
page = 1
|
| 268 |
per_page = 100
|
| 269 |
total_in_partition = 0
|
| 270 |
+
|
| 271 |
while True:
|
| 272 |
+
# Check debug limit
|
| 273 |
if debug_limit is not None and total_in_partition >= debug_limit:
|
| 274 |
+
print(f"{indent} 🐛 DEBUG MODE: Reached limit of {debug_limit} issues, stopping...")
|
| 275 |
return total_in_partition
|
| 276 |
url = 'https://api.github.com/search/issues'
|
| 277 |
params = {
|
|
|
|
| 281 |
'sort': 'created',
|
| 282 |
'order': 'asc'
|
| 283 |
}
|
| 284 |
+
|
| 285 |
try:
|
| 286 |
response = request_with_backoff('GET', url, headers=headers, params=params)
|
| 287 |
if response is None:
|
| 288 |
+
print(f"{indent} Error: retries exhausted for range {start_str} to {end_str}")
|
| 289 |
return total_in_partition
|
| 290 |
+
|
| 291 |
if response.status_code != 200:
|
| 292 |
+
print(f"{indent} Error: HTTP {response.status_code} for range {start_str} to {end_str}")
|
| 293 |
return total_in_partition
|
| 294 |
+
|
| 295 |
data = response.json()
|
| 296 |
total_count = data.get('total_count', 0)
|
| 297 |
items = data.get('items', [])
|
| 298 |
+
|
| 299 |
if not items:
|
| 300 |
break
|
| 301 |
+
|
| 302 |
+
# Add issues to global dict
|
| 303 |
for issue in items:
|
| 304 |
issue_id = issue.get('id')
|
| 305 |
if issue_id and issue_id not in issues_by_id:
|
| 306 |
issues_by_id[issue_id] = issue
|
| 307 |
total_in_partition += 1
|
| 308 |
+
|
| 309 |
+
# Check if we hit the 1000-result limit
|
| 310 |
if total_count > 1000 and page == 10:
|
| 311 |
+
print(f"{indent} ⚠️ Hit 1000-result limit ({total_count} total). Splitting time range...")
|
| 312 |
+
|
| 313 |
+
# Determine how to split based on time range duration
|
| 314 |
+
if total_seconds < 2: # Less than 2 seconds - can't split further
|
| 315 |
+
print(f"{indent} ⚠️ Cannot split further (range < 2 seconds). Some results may be missing.")
|
| 316 |
+
break
|
| 317 |
+
|
| 318 |
+
elif total_seconds < 120: # Less than 2 minutes - split by seconds
|
| 319 |
+
# Split into 2-4 parts depending on range
|
| 320 |
+
num_splits = min(4, max(2, int(total_seconds / 30)))
|
| 321 |
+
split_duration = time_diff / num_splits
|
| 322 |
+
split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
|
| 323 |
+
|
| 324 |
+
total_from_splits = 0
|
| 325 |
+
for i in range(num_splits):
|
| 326 |
+
split_start = split_dates[i]
|
| 327 |
+
split_end = split_dates[i + 1]
|
| 328 |
+
# Avoid overlapping ranges (add 1 second to start)
|
| 329 |
+
if i > 0:
|
| 330 |
+
split_start = split_start + timedelta(seconds=1)
|
| 331 |
+
|
| 332 |
+
count = fetch_issues_with_time_partition(
|
| 333 |
+
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
| 334 |
+
)
|
| 335 |
+
total_from_splits += count
|
| 336 |
+
|
| 337 |
+
return total_from_splits
|
| 338 |
+
|
| 339 |
+
elif total_seconds < 7200: # Less than 2 hours - split by minutes
|
| 340 |
+
# Split into 2-4 parts
|
| 341 |
+
num_splits = min(4, max(2, int(total_seconds / 1800)))
|
| 342 |
+
split_duration = time_diff / num_splits
|
| 343 |
+
split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
|
| 344 |
+
|
| 345 |
+
total_from_splits = 0
|
| 346 |
+
for i in range(num_splits):
|
| 347 |
+
split_start = split_dates[i]
|
| 348 |
+
split_end = split_dates[i + 1]
|
| 349 |
+
# Avoid overlapping ranges (add 1 minute to start)
|
| 350 |
+
if i > 0:
|
| 351 |
+
split_start = split_start + timedelta(minutes=1)
|
| 352 |
+
|
| 353 |
+
count = fetch_issues_with_time_partition(
|
| 354 |
+
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
| 355 |
+
)
|
| 356 |
+
total_from_splits += count
|
| 357 |
+
|
| 358 |
+
return total_from_splits
|
| 359 |
+
|
| 360 |
+
elif total_seconds < 172800: # Less than 2 days - split by hours
|
| 361 |
+
# Split into 2-4 parts
|
| 362 |
+
num_splits = min(4, max(2, int(total_seconds / 43200)))
|
| 363 |
+
split_duration = time_diff / num_splits
|
| 364 |
+
split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]
|
| 365 |
+
|
| 366 |
+
total_from_splits = 0
|
| 367 |
+
for i in range(num_splits):
|
| 368 |
+
split_start = split_dates[i]
|
| 369 |
+
split_end = split_dates[i + 1]
|
| 370 |
+
# Avoid overlapping ranges (add 1 hour to start)
|
| 371 |
+
if i > 0:
|
| 372 |
+
split_start = split_start + timedelta(hours=1)
|
| 373 |
+
|
| 374 |
+
count = fetch_issues_with_time_partition(
|
| 375 |
+
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
| 376 |
+
)
|
| 377 |
+
total_from_splits += count
|
| 378 |
+
|
| 379 |
+
return total_from_splits
|
| 380 |
+
|
| 381 |
+
else: # 2+ days - split by days
|
| 382 |
+
days_diff = time_diff.days
|
| 383 |
+
|
| 384 |
+
# Use aggressive splitting for large ranges or deep recursion
|
| 385 |
+
# Split into 4 parts if range is > 30 days, otherwise split in half
|
| 386 |
+
if days_diff > 30 or depth > 5:
|
| 387 |
+
# Split into 4 parts for more aggressive partitioning
|
| 388 |
+
quarter_diff = time_diff / 4
|
| 389 |
+
split_dates = [
|
| 390 |
+
start_date,
|
| 391 |
+
start_date + quarter_diff,
|
| 392 |
+
start_date + quarter_diff * 2,
|
| 393 |
+
start_date + quarter_diff * 3,
|
| 394 |
+
end_date
|
| 395 |
+
]
|
| 396 |
+
|
| 397 |
+
total_from_splits = 0
|
| 398 |
+
for i in range(4):
|
| 399 |
+
split_start = split_dates[i]
|
| 400 |
+
split_end = split_dates[i + 1]
|
| 401 |
+
# Avoid overlapping ranges
|
| 402 |
+
if i > 0:
|
| 403 |
+
split_start = split_start + timedelta(days=1)
|
| 404 |
+
|
| 405 |
+
count = fetch_issues_with_time_partition(
|
| 406 |
+
base_query, split_start, split_end, headers, issues_by_id, debug_limit, depth + 1
|
| 407 |
+
)
|
| 408 |
+
total_from_splits += count
|
| 409 |
+
|
| 410 |
+
return total_from_splits
|
| 411 |
+
else:
|
| 412 |
+
# Binary split for smaller ranges
|
| 413 |
+
mid_date = start_date + time_diff / 2
|
| 414 |
+
|
| 415 |
+
# Recursively fetch both halves
|
| 416 |
+
count1 = fetch_issues_with_time_partition(
|
| 417 |
+
base_query, start_date, mid_date, headers, issues_by_id, debug_limit, depth + 1
|
| 418 |
+
)
|
| 419 |
+
count2 = fetch_issues_with_time_partition(
|
| 420 |
+
base_query, mid_date + timedelta(days=1), end_date, headers, issues_by_id, debug_limit, depth + 1
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
return count1 + count2
|
| 424 |
+
|
| 425 |
+
# Normal pagination: check if there are more pages
|
| 426 |
if len(items) < per_page or page >= 10:
|
| 427 |
break
|
| 428 |
+
|
| 429 |
page += 1
|
| 430 |
+
time.sleep(0.5) # Courtesy delay between pages
|
| 431 |
+
|
| 432 |
except Exception as e:
|
| 433 |
+
print(f"{indent} Error fetching range {start_str} to {end_str}: {str(e)}")
|
| 434 |
return total_in_partition
|
| 435 |
+
|
| 436 |
if total_in_partition > 0:
|
| 437 |
+
print(f"{indent} ✓ Found {total_in_partition} issues in range {start_str} to {end_str}")
|
| 438 |
+
|
| 439 |
return total_in_partition
|
| 440 |
|
| 441 |
|