File size: 98,696 Bytes
9b447a6
5750bb3
9b447a6
 
 
3d31827
9b447a6
 
 
 
f1f4f5c
9b447a6
 
f1f4f5c
9b447a6
 
 
 
 
 
 
3d31827
9b447a6
 
 
 
 
 
 
 
d2e40c0
9b447a6
b110dcd
94818e2
 
9b447a6
 
 
 
 
325abdd
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a68078
9b447a6
0a68078
 
 
 
 
 
 
9b447a6
0a68078
 
9b447a6
 
 
 
 
 
 
f1f4f5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87c2ab6
 
f1f4f5c
 
87c2ab6
f1f4f5c
 
 
 
 
 
 
 
 
 
 
87c2ab6
 
f1f4f5c
 
87c2ab6
f1f4f5c
 
 
 
 
 
 
 
 
 
 
87c2ab6
 
f1f4f5c
 
87c2ab6
f1f4f5c
 
 
 
 
 
 
 
 
 
 
87c2ab6
 
f1f4f5c
 
87c2ab6
f1f4f5c
 
 
 
 
 
 
3d31827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fd77fe
313696f
 
 
 
 
 
 
 
 
22ee755
0fd77fe
313696f
 
 
 
 
 
0fd77fe
 
 
 
 
 
313696f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fd77fe
313696f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fd77fe
313696f
 
 
 
0fd77fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313696f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d31827
 
313696f
 
 
 
3d31827
 
f1f4f5c
3d31827
 
 
 
 
 
 
 
f1f4f5c
3d31827
 
 
 
 
f1f4f5c
3d31827
 
f1f4f5c
 
 
 
 
 
 
 
 
 
 
 
3d31827
f1f4f5c
3d31827
f1f4f5c
 
 
3d31827
f1f4f5c
 
 
 
 
 
3d31827
f1f4f5c
 
 
3d31827
 
f1f4f5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d31827
 
 
 
 
 
 
f1f4f5c
3d31827
 
 
 
 
 
 
 
 
 
 
 
 
5750bb3
3d31827
5750bb3
3d31827
 
 
 
 
5750bb3
 
 
 
 
3d31827
c0eda81
3d31827
5750bb3
 
3d31827
dcdb282
3d31827
 
dcdb282
 
5750bb3
 
dcdb282
 
5750bb3
 
dcdb282
3d31827
 
c0eda81
3d31827
5750bb3
 
3d31827
 
 
9b447a6
 
 
 
74e2c25
9b447a6
 
 
 
74e2c25
 
 
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74e2c25
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
74e2c25
 
9b447a6
 
 
74e2c25
 
 
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bf98ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b447a6
3bf98ae
 
 
 
 
 
 
74e2c25
 
 
 
 
 
 
 
 
 
 
3bf98ae
 
74e2c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bf98ae
 
74e2c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bf98ae
 
 
74e2c25
3bf98ae
 
9b447a6
74e2c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74e2c25
 
9b447a6
 
74e2c25
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
c0eda81
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
74e2c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
74e2c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
74e2c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b447a6
 
 
5750bb3
9b447a6
 
5750bb3
 
 
9b447a6
5750bb3
 
 
9b447a6
 
 
c0eda81
9b447a6
 
 
 
 
5750bb3
9b447a6
 
c0eda81
9b447a6
c0eda81
5750bb3
9b447a6
 
 
c0eda81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b447a6
 
 
c0eda81
9b447a6
 
 
 
325abdd
9b447a6
c0eda81
 
 
9b447a6
 
 
c0eda81
325abdd
c0eda81
9b447a6
 
 
c0eda81
9b447a6
 
 
c0eda81
9b447a6
 
325abdd
 
9b447a6
 
 
325abdd
9b447a6
 
 
 
 
c0eda81
9b447a6
c0eda81
675e937
9b447a6
c0eda81
 
 
 
9b447a6
 
 
 
 
 
 
 
325abdd
9b447a6
 
 
 
 
 
 
 
c0eda81
9b447a6
39cbf48
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325abdd
9b447a6
 
 
 
5750bb3
325abdd
5750bb3
9b447a6
 
 
5750bb3
9b447a6
 
 
 
 
325abdd
 
9b447a6
 
 
325abdd
9b447a6
 
 
 
325abdd
9b447a6
 
c0eda81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b447a6
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5750bb3
8f68ab4
9b447a6
 
 
 
 
8f68ab4
 
 
9b447a6
 
 
 
 
 
 
 
 
 
8f68ab4
 
 
 
9b447a6
8f68ab4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5750bb3
 
 
8f68ab4
 
5750bb3
 
8f68ab4
 
 
 
 
4a42d8c
 
 
f1f4f5c
 
8f68ab4
 
4a42d8c
8f68ab4
 
 
 
 
 
 
 
 
9b447a6
 
 
8f68ab4
 
9b447a6
 
 
39cbf48
9b447a6
39cbf48
 
9b447a6
 
 
 
39cbf48
9b447a6
39cbf48
 
 
 
9b447a6
 
 
 
 
f1f4f5c
9b447a6
39cbf48
 
 
9b447a6
 
 
 
 
39cbf48
 
 
 
 
 
 
 
 
 
 
 
 
 
9b447a6
39cbf48
9b447a6
 
c0eda81
 
39cbf48
9b447a6
 
 
 
 
 
 
 
 
c0eda81
9b447a6
f1f4f5c
9b447a6
 
 
 
 
 
 
39cbf48
 
9b447a6
39cbf48
 
 
 
 
 
 
 
 
 
9b447a6
39cbf48
 
9b447a6
39cbf48
9b447a6
 
 
39cbf48
c0eda81
9b447a6
 
 
39cbf48
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1f4f5c
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
f1f4f5c
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
c0eda81
9b447a6
 
f1f4f5c
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
c0eda81
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1f4f5c
9b447a6
 
 
 
 
 
 
f1f4f5c
9b447a6
 
 
 
 
 
 
c0eda81
f1f4f5c
 
 
 
 
c0eda81
 
 
 
 
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d454e42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1f4f5c
d454e42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
002c8f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1f4f5c
 
002c8f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b447a6
 
 
 
 
 
5a89f16
9b447a6
092b200
 
9b447a6
092b200
 
 
 
 
 
 
 
 
 
 
 
e98a10e
092b200
 
 
267a222
9b447a6
092b200
e98a10e
092b200
 
 
 
47008cd
092b200
 
 
 
47008cd
092b200
 
0fd77fe
267a222
0fd77fe
267a222
0fd77fe
 
 
 
 
 
 
 
 
 
 
 
092b200
 
 
 
 
47008cd
092b200
 
 
 
 
 
9b447a6
 
 
 
 
 
 
 
 
 
c0eda81
9b447a6
 
 
c0eda81
9b447a6
 
c0eda81
 
675e937
39cbf48
c0eda81
 
9b447a6
 
 
 
c0eda81
9b447a6
 
 
 
 
 
 
 
f1f4f5c
 
9b447a6
 
 
 
 
c0eda81
 
9b447a6
 
 
 
 
 
 
22ee755
9b447a6
 
 
 
 
 
c0eda81
 
22ee755
9b447a6
d454e42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d31827
 
 
 
 
 
 
9b447a6
 
 
 
 
3d31827
 
 
9b447a6
 
3d31827
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d31827
9b447a6
c0eda81
3d31827
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d31827
9b447a6
 
c0eda81
3d31827
 
9b447a6
 
 
 
 
 
 
 
 
5750bb3
 
 
 
 
 
 
 
 
9b447a6
 
 
c0eda81
9b447a6
 
3d31827
9b447a6
 
c0eda81
 
9b447a6
 
 
 
 
 
 
d454e42
 
eed1a0a
9b447a6
d454e42
 
 
 
 
 
 
 
 
 
9b447a6
c0eda81
 
9b447a6
c0eda81
9b447a6
 
 
 
 
c0eda81
 
 
 
 
9b447a6
c0eda81
 
9b447a6
c0eda81
9b447a6
 
f1f4f5c
9b447a6
c0eda81
325abdd
9b447a6
 
 
c0eda81
 
 
9b447a6
 
 
 
 
325abdd
9b447a6
 
 
 
eed1a0a
 
 
9b447a6
c0eda81
 
 
9b447a6
 
 
452bbc0
9b447a6
 
 
 
 
 
c0eda81
9b447a6
c0eda81
5750bb3
 
9b447a6
c0eda81
9b447a6
 
 
 
5750bb3
9b447a6
 
 
 
 
c0eda81
9b447a6
 
 
 
 
 
c0eda81
9b447a6
 
 
f1f4f5c
5750bb3
9b447a6
 
 
 
 
 
c0eda81
9b447a6
d454e42
 
 
 
 
 
 
 
 
 
e98a10e
c0eda81
9b447a6
 
 
 
 
 
47008cd
 
 
d454e42
47008cd
9b447a6
 
5a89f16
47008cd
 
 
9b447a6
 
 
711093a
 
47008cd
 
711093a
9b447a6
 
 
e8d1aad
9b447a6
 
805848b
9b447a6
 
c0eda81
9b447a6
 
e8d1aad
9b447a6
c0eda81
9b447a6
 
5750bb3
 
 
 
 
 
 
 
 
 
9b447a6
 
c0eda81
 
 
 
 
 
9b447a6
c0eda81
 
 
 
 
 
 
 
 
22ee755
c0eda81
 
9b447a6
 
c0eda81
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5750bb3
 
 
9b447a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452bbc0
c0eda81
9b447a6
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter
import json
import os
import time
import tempfile
import requests
from datetime import datetime, timezone, timedelta
from collections import defaultdict
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.errors import HfHubHTTPError
from datasets import load_dataset, Dataset
import threading
import backoff
from dotenv import load_dotenv
import pandas as pd
import random
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from google.cloud import bigquery

# Load environment variables
load_dotenv()

# =============================================================================
# CONFIGURATION
# =============================================================================

AGENTS_REPO = "SWE-Arena/agent_metadata"  # HuggingFace dataset for agent metadata
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"  # HuggingFace dataset for review metadata
LEADERBOARD_REPO = "SWE-Arena/leaderboard_metadata"  # HuggingFace dataset for leaderboard data
LEADERBOARD_TIME_FRAME_DAYS = 180  # Time frame for constructing leaderboard
UPDATE_TIME_FRAME_DAYS = 30  # Time frame for mining new reviews

LEADERBOARD_COLUMNS = [
    ("Agent Name", "string"),
    ("Website", "string"),
    ("Total Reviews", "number"),
    ("Merged PRs", "number"),
    ("Acceptance Rate (%)", "number"),
]

# =============================================================================
# JSONL FILE OPERATIONS
# =============================================================================

def load_jsonl(filename):
    """Load JSONL file and return list of dictionaries."""
    if not os.path.exists(filename):
        return []
    
    data = []
    with open(filename, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line:
                try:
                    entry = json.loads(line)
                    data.append(entry)
                except json.JSONDecodeError as e:
                    print(f"Warning: Skipping invalid JSON line: {e}")
    return data


def save_jsonl(filename, data):
    """Save list of dictionaries to JSONL file."""
    with open(filename, 'w', encoding='utf-8') as f:
        for item in data:
            f.write(json.dumps(item) + '\n')


def cache_to_dict(cache_list):
    """Convert list of cache entries to dictionary by identifier."""
    return {entry['github_identifier']: entry for entry in cache_list}


def dict_to_cache(cache_dict):
    """Convert dictionary back to list of values."""
    return list(cache_dict.values())


def normalize_date_format(date_string):
    """
    Convert date strings to standardized ISO 8601 format with Z suffix.
    Handles both old format (2025-10-15T23:23:47.983068) and new format (2025-10-15T23:23:47Z).
    """
    if not date_string or date_string == 'N/A':
        return 'N/A'

    try:
        # Replace space with 'T' for ISO format compatibility
        date_string = date_string.replace(' ', 'T')

        # Fix incomplete timezone offset (+00 or -00 -> +00:00 or -00:00)
        if date_string[-3:-2] in ('+', '-') and ':' not in date_string[-3:]:
            date_string = date_string + ':00'

        # Parse the date string (handles both with and without microseconds)
        dt = datetime.fromisoformat(date_string.replace('Z', '+00:00'))

        # Convert to standardized format
        return dt.strftime('%Y-%m-%dT%H:%M:%SZ')
    except Exception as e:
        print(f"Warning: Could not parse date '{date_string}': {e}")
        return date_string


# =============================================================================
# HUGGINGFACE API WRAPPERS WITH BACKOFF
# =============================================================================

def is_rate_limit_error(e):
    """Check if exception is a HuggingFace rate limit error (429)."""
    if isinstance(e, HfHubHTTPError):
        return e.response.status_code == 429
    return False


@backoff.on_exception(
    backoff.expo,
    HfHubHTTPError,
    max_tries=8,
    base=300,
    max_value=3600,
    giveup=lambda e: not is_rate_limit_error(e),
    on_backoff=lambda details: print(
        f"⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
    )
)
def upload_large_folder_with_backoff(api, **kwargs):
    """Wrapper for api.upload_large_folder() with exponential backoff for rate limits."""
    return api.upload_large_folder(**kwargs)


@backoff.on_exception(
    backoff.expo,
    HfHubHTTPError,
    max_tries=8,
    base=300,
    max_value=3600,
    giveup=lambda e: not is_rate_limit_error(e),
    on_backoff=lambda details: print(
        f"⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
    )
)
def list_repo_files_with_backoff(api, **kwargs):
    """Wrapper for api.list_repo_files() with exponential backoff for rate limits."""
    return api.list_repo_files(**kwargs)


@backoff.on_exception(
    backoff.expo,
    HfHubHTTPError,
    max_tries=8,
    base=300,
    max_value=3600,
    giveup=lambda e: not is_rate_limit_error(e),
    on_backoff=lambda details: print(
        f"⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
    )
)
def hf_hub_download_with_backoff(**kwargs):
    """Wrapper for hf_hub_download() with exponential backoff for rate limits."""
    return hf_hub_download(**kwargs)


@backoff.on_exception(
    backoff.expo,
    HfHubHTTPError,
    max_tries=8,
    base=300,
    max_value=3600,
    giveup=lambda e: not is_rate_limit_error(e),
    on_backoff=lambda details: print(
        f"⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
    )
)
def upload_file_with_backoff(api, **kwargs):
    """Wrapper for api.upload_file() with exponential backoff for rate limits."""
    return api.upload_file(**kwargs)


# =============================================================================
# BIGQUERY FUNCTIONS
# =============================================================================

def get_bigquery_client():
    """
    Initialize BigQuery client using credentials from environment variable.

    Expects GOOGLE_APPLICATION_CREDENTIALS_JSON environment variable containing
    the service account JSON credentials as a string.
    """
    # Get the JSON content from environment variable
    creds_json = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS_JSON')

    if creds_json:
        # Create a temporary file to store credentials
        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
            temp_file.write(creds_json)
            temp_path = temp_file.name

        # Set environment variable to point to temp file
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = temp_path

        # Initialize BigQuery client
        client = bigquery.Client()

        # Clean up temp file
        os.unlink(temp_path)

        return client
    else:
        raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")


def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=50, upload_immediately=True):
    """
    Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
    Splits agents into smaller batches to avoid performance issues with large queries.

    Args:
        client: BigQuery client instance
        identifiers: List of GitHub usernames/bot identifiers
        start_date: Start datetime (timezone-aware)
        end_date: End datetime (timezone-aware)
        batch_size: Number of agents to process per batch (default: 50)
        upload_immediately: If True, upload each batch to HuggingFace immediately after processing (default: True)

    Returns:
        Dictionary mapping agent identifier to list of PR metadata
    """
    print(f"\nπŸ” Using BATCHED approach: {len(identifiers)} agents in batches of {batch_size}")

    # Log upload mode
    if upload_immediately:
        print(f"   πŸ“€ Upload mode: IMMEDIATE (upload after each batch)")
    else:
        print(f"   πŸ“€ Upload mode: DEFERRED (upload after all batches complete)")

    # Split identifiers into batches
    batches = [identifiers[i:i + batch_size] for i in range(0, len(identifiers), batch_size)]
    total_batches = len(batches)

    print(f"   Total batches: {total_batches}")

    # Collect results from all batches
    all_metadata = {}
    successful_batches = 0
    failed_batches = 0

    for batch_num, batch_identifiers in enumerate(batches, 1):
        print(f"\nπŸ“¦ Processing batch {batch_num}/{total_batches} ({len(batch_identifiers)} agents)...")

        try:
            # Query this batch - process each agent in the batch
            batch_results = {}
            for identifier in batch_identifiers:
                review_rows = fetch_reviews_from_bigquery(client, identifier, start_date, end_date)

                # Extract metadata
                metadata_list = []
                seen_prs = set()
                for row in review_rows:
                    url = row.url
                    if url in seen_prs:
                        continue
                    seen_prs.add(url)

                    metadata = extract_review_metadata_from_bigquery(row)
                    metadata_list.append(metadata)

                if metadata_list:
                    all_metadata[identifier] = metadata_list
                    batch_results[identifier] = metadata_list

            successful_batches += 1
            print(f"   βœ“ Batch {batch_num}/{total_batches} complete: {len(batch_identifiers)} agents processed")

            # Upload immediately after this batch if enabled
            if upload_immediately and batch_results:
                print(f"\n   πŸ“€ Uploading batch {batch_num}/{total_batches} results to HuggingFace...")
                upload_success = 0
                upload_errors = 0

                for identifier, metadata_list in batch_results.items():
                    if metadata_list:
                        if save_review_metadata_to_hf(metadata_list, identifier):
                            upload_success += 1
                        else:
                            upload_errors += 1

                print(f"   βœ“ Batch {batch_num}/{total_batches} upload complete ({upload_success} agents uploaded, {upload_errors} errors)")

        except Exception as e:
            failed_batches += 1
            print(f"   βœ— Batch {batch_num}/{total_batches} failed: {str(e)}")
            print(f"   Continuing with remaining batches...")
            continue

    print(f"\nπŸ“Š Batching Summary:")
    print(f"   Total batches: {total_batches}")
    print(f"   Successful: {successful_batches}")
    print(f"   Failed: {failed_batches}")
    print(f"   Total agents with data: {len(all_metadata)}")

    return all_metadata


def fetch_reviews_from_bigquery(client, identifier, start_date, end_date):
    """
    Fetch PR review events from GitHub Archive for a SINGLE agent.

    NOTE: This function is designed for querying a single agent at a time.
    For querying multiple agents efficiently, use fetch_all_pr_metadata_batched() instead.

    Queries githubarchive.day.YYYYMMDD tables for PullRequestReviewEvent where
    actor.login matches the agent identifier, and joins with PR status.

    Args:
        client: BigQuery client instance
        identifier: GitHub username or bot identifier (e.g., 'amazon-inspector-beta[bot]')
        start_date: Start datetime (timezone-aware)
        end_date: End datetime (timezone-aware)

    Returns:
        List of review event rows with PR information including merged_at and closed_at
    """
    print(f"\nπŸ” Querying BigQuery for reviews by {identifier}")
    print(f"   Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")

    # Generate list of table names for each day in the range
    review_tables = []
    current_date = start_date
    while current_date < end_date:
        table_name = f"`githubarchive.day.{current_date.strftime('%Y%m%d')}`"
        review_tables.append(f"SELECT * FROM {table_name}")
        current_date += timedelta(days=1)
    review_union = " UNION ALL ".join(review_tables)

    # Generate status tables (lookback for PR status)
    status_start = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
    status_tables = []
    current_date = status_start
    while current_date < end_date:
        table_name = f"`githubarchive.day.{current_date.strftime('%Y%m%d')}`"
        status_tables.append(f"SELECT * FROM {table_name}")
        current_date += timedelta(days=1)
    status_union = " UNION ALL ".join(status_tables)

    # Build comprehensive query with CTEs for PR status
    query = f"""
    WITH review_events AS (
        SELECT
            JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
            COALESCE(
                JSON_EXTRACT_SCALAR(payload, '$.review.submitted_at'),
                CAST(created_at AS STRING)
            ) as reviewed_at,
            actor.login as reviewer,
            created_at
        FROM (
            {review_union}
        )
        WHERE type = 'PullRequestReviewEvent'
        AND actor.login = @identifier
        AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IS NOT NULL
    ),
    pr_status AS (
        SELECT
            JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
            JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged_at') as merged_at,
            JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,
            created_at
        FROM (
            {status_union}
        )
        WHERE type = 'PullRequestEvent'
        AND JSON_EXTRACT_SCALAR(payload, '$.action') = 'closed'
        AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IN (
            SELECT DISTINCT url FROM review_events
        )
        QUALIFY ROW_NUMBER() OVER (PARTITION BY url ORDER BY created_at DESC) = 1
    )
    SELECT DISTINCT
        re.url,
        re.reviewed_at,
        re.created_at,
        ps.merged_at,
        ps.closed_at
    FROM review_events re
    LEFT JOIN pr_status ps ON re.url = ps.url
    ORDER BY re.reviewed_at DESC
    """

    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("identifier", "STRING", identifier)
        ]
    )

    print(f"   Querying {len(review_tables)} review tables and {len(status_tables)} status tables...")

    try:
        query_job = client.query(query, job_config=job_config)
        results = list(query_job.result())

        print(f"   βœ“ Found {len(results)} review events")
        return results

    except Exception as e:
        print(f"   βœ— BigQuery error: {str(e)}")
        return []


def extract_review_metadata_from_bigquery(review_row):
    """
    Extract minimal PR review metadata from BigQuery row.

    Args:
        review_row: BigQuery row from PullRequestReviewEvent query

    Returns:
        Dictionary with review metadata containing:
        - url: PR URL
        - reviewed_at: Review timestamp
        - merged_at: Merge timestamp (if merged, else None)
        - closed_at: Close timestamp (if closed, else None)
    """
    url = review_row.url
    reviewed_at = review_row.reviewed_at or review_row.created_at
    merged_at = getattr(review_row, 'merged_at', None)
    closed_at = getattr(review_row, 'closed_at', None)

    # Convert to ISO format if datetime and normalize
    if hasattr(reviewed_at, 'isoformat'):
        reviewed_at = reviewed_at.isoformat()
    reviewed_at = normalize_date_format(reviewed_at) if reviewed_at else None

    if merged_at and hasattr(merged_at, 'isoformat'):
        merged_at = merged_at.isoformat()
    merged_at = normalize_date_format(merged_at) if merged_at else None

    if closed_at and hasattr(closed_at, 'isoformat'):
        closed_at = closed_at.isoformat()
    closed_at = normalize_date_format(closed_at) if closed_at else None

    return {
        'url': url,
        'reviewed_at': reviewed_at,
        'merged_at': merged_at,
        'closed_at': closed_at
    }


# =============================================================================
# GITHUB API OPERATIONS
# =============================================================================

def request_with_backoff(method, url, *, headers=None, params=None, json_body=None, data=None, max_retries=10, timeout=30, token_pool=None, token=None):
    """
    Perform an HTTP request with exponential backoff and jitter for GitHub API.
    Retries on 403/429 (rate limits), 5xx server errors, and transient network exceptions.

    Args:
        token_pool: Optional TokenPool instance for rate limit tracking
        token: Optional token string to mark as rate-limited if 403/429 occurs

    Returns the final requests.Response on success or non-retryable status, or None after exhausting retries.
    """
    delay = 1.0
    for attempt in range(max_retries):
        try:
            resp = requests.request(
                method,
                url,
                headers=headers or {},
                params=params,
                json=json_body,
                data=data,
                timeout=timeout
            )

            status = resp.status_code

            # Success
            if 200 <= status < 300:
                return resp

            # Rate limits or server errors -> retry with backoff
            if status in (403, 429) or 500 <= status < 600:
                wait = None
                reset_timestamp = None

                # Prefer Retry-After when present
                retry_after = resp.headers.get('Retry-After') or resp.headers.get('retry-after')
                if retry_after:
                    try:
                        wait = float(retry_after)
                    except Exception:
                        wait = None

                # Fallback to X-RateLimit-Reset when 403/429
                if wait is None and status in (403, 429):
                    reset_hdr = resp.headers.get('X-RateLimit-Reset') or resp.headers.get('x-ratelimit-reset')
                    if reset_hdr:
                        try:
                            reset_timestamp = int(float(reset_hdr))
                            wait = max(reset_timestamp - time.time() + 2, 1)
                        except Exception:
                            wait = None

                # Mark token as rate-limited if we have token pool and token
                if status in (403, 429) and token_pool and token:
                    token_pool.mark_rate_limited(token, reset_timestamp)

                # Final fallback: exponential backoff with jitter
                if wait is None:
                    wait = delay + random.uniform(0, 0.5)

                # Cap individual wait to avoid extreme sleeps
                wait = max(1.0, min(wait, 120.0))
                print(f"GitHub API {status}. Backing off {wait:.1f}s (attempt {attempt + 1}/{max_retries})...")
                time.sleep(wait)
                delay = min(delay * 2, 60.0)
                continue

            # Non-retryable error; return response for caller to handle
            return resp

        except requests.RequestException as e:
            # Network error -> retry with backoff
            wait = delay + random.uniform(0, 0.5)
            wait = max(1.0, min(wait, 60.0))
            print(f"Request error: {e}. Retrying in {wait:.1f}s (attempt {attempt + 1}/{max_retries})...")
            time.sleep(wait)
            delay = min(delay * 2, 60.0)

    print(f"Exceeded max retries for {url}")
    return None

def get_github_tokens():
    """Get all GitHub tokens from environment variables (all vars starting with GITHUB_TOKEN)."""
    tokens = []
    for key, value in os.environ.items():
        if key.startswith('GITHUB_TOKEN') and value:
            tokens.append(value)

    if not tokens:
        print("Warning: No GITHUB_TOKEN found. API rate limits: 60/hour (authenticated: 5000/hour)")
    else:
        print(f"βœ“ Loaded {len(tokens)} GitHub token(s) for rotation")

    return tokens


def get_github_token():
    """Get first GitHub token from environment variables (backward compatibility)."""
    tokens = get_github_tokens()
    return tokens[0] if tokens else None


class TokenPool:
    """
    Hybrid token pool with parallel execution and round-robin fallback.

    Splits tokens into two pools:
    - Parallel pool (50%): For concurrent API calls to maximize throughput
    - Round-robin pool (50%): Backup pool for rate limit fallback

    Features:
    - Automatic fallback when parallel tokens hit rate limits
    - Rate limit tracking with timestamp-based recovery
    - Thread-safe token management
    - Real-time statistics monitoring
    """
    def __init__(self, tokens):
        import threading

        self.all_tokens = tokens if tokens else [None]
        self.lock = threading.Lock()

        # Split tokens into parallel and round-robin pools (50/50)
        total_tokens = len(self.all_tokens)
        split_point = max(1, total_tokens // 2)

        self.parallel_tokens = self.all_tokens[:split_point]
        self.roundrobin_tokens = self.all_tokens[split_point:] if total_tokens > 1 else self.all_tokens

        # Round-robin index for fallback pool
        self.roundrobin_index = 0

        # Rate limit tracking: {token: reset_timestamp}
        self.parallel_rate_limited = set()
        self.roundrobin_rate_limited = set()
        self.rate_limit_resets = {}

        # Statistics
        self.stats = {
            'parallel_calls': 0,
            'roundrobin_calls': 0,
            'fallback_triggers': 0
        }

        print(f"πŸ“Š Token Pool Initialized:")
        print(f"   Total tokens: {total_tokens}")
        print(f"   Parallel pool: {len(self.parallel_tokens)} tokens")
        print(f"   Round-robin pool: {len(self.roundrobin_tokens)} tokens")

    def _cleanup_expired_rate_limits(self):
        """Remove tokens from rate-limited sets if their reset time has passed."""
        current_time = time.time()
        expired_tokens = [
            token for token, reset_time in self.rate_limit_resets.items()
            if current_time >= reset_time
        ]

        for token in expired_tokens:
            self.parallel_rate_limited.discard(token)
            self.roundrobin_rate_limited.discard(token)
            del self.rate_limit_resets[token]
            if expired_tokens:
                print(f"   βœ“ Recovered {len(expired_tokens)} token(s) from rate limit")

    def get_parallel_token(self):
        """Get an available token from the parallel pool."""
        with self.lock:
            self._cleanup_expired_rate_limits()

            # Find first non-rate-limited parallel token
            for token in self.parallel_tokens:
                if token not in self.parallel_rate_limited:
                    self.stats['parallel_calls'] += 1
                    return token

            return None

    def get_roundrobin_token(self):
        """Get the next available token from round-robin pool."""
        with self.lock:
            self._cleanup_expired_rate_limits()

            # Try all tokens in round-robin order
            attempts = 0
            while attempts < len(self.roundrobin_tokens):
                token = self.roundrobin_tokens[self.roundrobin_index]
                self.roundrobin_index = (self.roundrobin_index + 1) % len(self.roundrobin_tokens)

                if token not in self.roundrobin_rate_limited:
                    self.stats['roundrobin_calls'] += 1
                    return token

                attempts += 1

            return None

    def get_next_token(self):
        """
        Get next available token, trying parallel pool first, then falling back to round-robin.

        Returns:
            Token string or None if all tokens are rate-limited
        """
        # Try parallel pool first
        token = self.get_parallel_token()
        if token:
            return token

        # Fallback to round-robin pool
        with self.lock:
            self.stats['fallback_triggers'] += 1

        token = self.get_roundrobin_token()
        if not token:
            print("   ⚠️ All tokens are rate-limited, waiting...")

        return token

    def get_headers(self):
        """Get headers with the next available token."""
        token = self.get_next_token()
        return {'Authorization': f'token {token}'} if token else {}

    def mark_rate_limited(self, token, reset_timestamp=None):
        """
        Mark a token as rate-limited with optional reset timestamp.

        Args:
            token: The token to mark as rate-limited
            reset_timestamp: Unix timestamp when rate limit resets (optional)
        """
        if not token:
            return

        with self.lock:
            # Determine which pool the token belongs to
            if token in self.parallel_tokens:
                self.parallel_rate_limited.add(token)
            if token in self.roundrobin_tokens:
                self.roundrobin_rate_limited.add(token)

            # Store reset timestamp if provided
            if reset_timestamp:
                self.rate_limit_resets[token] = reset_timestamp
                reset_time = datetime.fromtimestamp(reset_timestamp, tz=timezone.utc)
                print(f"   ⏰ Token rate-limited until {reset_time.strftime('%H:%M:%S')} UTC")

    def get_available_parallel_tokens(self):
        """Get list of all available (non-rate-limited) parallel tokens."""
        with self.lock:
            self._cleanup_expired_rate_limits()
            return [t for t in self.parallel_tokens if t not in self.parallel_rate_limited]

    def get_stats(self):
        """Get token pool usage statistics."""
        with self.lock:
            return {
                'parallel_calls': self.stats['parallel_calls'],
                'roundrobin_calls': self.stats['roundrobin_calls'],
                'fallback_triggers': self.stats['fallback_triggers'],
                'parallel_rate_limited': len(self.parallel_rate_limited),
                'roundrobin_rate_limited': len(self.roundrobin_rate_limited)
            }

    def print_stats(self):
        """Print token pool usage statistics."""
        stats = self.get_stats()
        total_calls = stats['parallel_calls'] + stats['roundrobin_calls']

        print(f"\nπŸ“Š Token Pool Statistics:")
        print(f"   Total API calls: {total_calls}")
        if total_calls > 0:
            print(f"   Parallel calls: {stats['parallel_calls']} ({stats['parallel_calls']/total_calls*100:.1f}%)")
            print(f"   Round-robin calls: {stats['roundrobin_calls']} ({stats['roundrobin_calls']/total_calls*100:.1f}%)")
        print(f"   Fallback triggers: {stats['fallback_triggers']}")
        print(f"   Currently rate-limited: {stats['parallel_rate_limited']} parallel, {stats['roundrobin_rate_limited']} round-robin")


def validate_github_username(identifier):
    """Verify that a GitHub identifier exists with backoff-aware requests."""
    try:
        token = get_github_token()
        headers = {'Authorization': f'token {token}'} if token else {}
        url = f'https://api.github.com/users/{identifier}'
        response = request_with_backoff('GET', url, headers=headers, max_retries=1)
        if response is None:
            return False, "Validation error: network/rate limit exhausted"
        if response.status_code == 200:
            return True, "Username is valid"
        elif response.status_code == 404:
            return False, "GitHub identifier not found"
        else:
            return False, f"Validation error: HTTP {response.status_code}"
    except Exception as e:
        return False, f"Validation error: {str(e)}"


def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_pool, prs_by_url, depth=0):
    """
    Fetch reviews within a specific time range using time-based partitioning.
    Recursively splits the time range if hitting the 1000-result limit.
    Supports splitting by day, hour, minute, and second as needed.

    Args:
        depth: Current recursion depth (for tracking)

    Returns the number of reviews found in this time partition.
    """
    # Calculate time difference
    time_diff = end_date - start_date
    total_seconds = time_diff.total_seconds()

    # Determine granularity and format dates accordingly
    if total_seconds >= 86400:  # >= 1 day
        # Use day granularity (YYYY-MM-DD)
        start_str = start_date.strftime('%Y-%m-%d')
        end_str = end_date.strftime('%Y-%m-%d')
    elif total_seconds >= 3600:  # >= 1 hour but < 1 day
        # Use hour granularity (YYYY-MM-DDTHH:MM:SSZ)
        start_str = start_date.strftime('%Y-%m-%dT%H:00:00Z')
        end_str = end_date.strftime('%Y-%m-%dT%H:59:59Z')
    elif total_seconds >= 60:  # >= 1 minute but < 1 hour
        # Use minute granularity (YYYY-MM-DDTHH:MM:SSZ)
        start_str = start_date.strftime('%Y-%m-%dT%H:%M:00Z')
        end_str = end_date.strftime('%Y-%m-%dT%H:%M:59Z')
    else:  # < 1 minute
        # Use second granularity (YYYY-MM-DDTHH:MM:SSZ)
        start_str = start_date.strftime('%Y-%m-%dT%H:%M:%SZ')
        end_str = end_date.strftime('%Y-%m-%dT%H:%M:%SZ')

    # Add date range to query (use created for PR search)
    query = f'{base_query} created:{start_str}..{end_str}'

    indent = "  " + "  " * depth
    print(f"{indent}Searching range {start_str} to {end_str}...")

    page = 1
    per_page = 100
    total_in_partition = 0

    while True:
        url = 'https://api.github.com/search/issues'  # Use issues endpoint for PR search
        params = {
            'q': query,
            'per_page': per_page,
            'page': page,
            'sort': 'created',
            'order': 'asc'
        }
        token = token_pool.get_next_token()
        headers = {'Authorization': f'token {token}'} if token else {}

        try:
            response = request_with_backoff('GET', url, headers=headers, params=params, token_pool=token_pool, token=token)
            if response is None:
                print(f"{indent}  Error: retries exhausted for range {start_str} to {end_str}")
                return total_in_partition

            if response.status_code != 200:
                print(f"{indent}  Error: HTTP {response.status_code} for range {start_str} to {end_str}")
                return total_in_partition

            data = response.json()
            total_count = data.get('total_count', 0)
            items = data.get('items', [])

            if not items:
                break

            # Add PR reviews to global dict (keyed by PR URL)
            for pr in items:
                url = pr.get('url')
                pr_number = pr.get('number')
                # Use PR URL as unique key (more reliable than number alone)
                if url and url not in prs_by_url:
                    prs_by_url[url] = pr
                    total_in_partition += 1

            # Check if we hit the 1000-result limit
            if total_count > 1000 and page == 10:
                print(f"{indent}  ⚠️ Hit 1000-result limit ({total_count} total). Splitting time range...")

                # Determine how to split based on time range duration
                if total_seconds < 2:  # Less than 2 seconds - can't split further
                    print(f"{indent}  ⚠️ Cannot split further (range < 2 seconds). Some results may be missing.")
                    break

                elif total_seconds < 120:  # Less than 2 minutes - split by seconds
                    # Split into 2-4 parts depending on range
                    num_splits = min(4, max(2, int(total_seconds / 30)))
                    split_duration = time_diff / num_splits
                    split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]

                    total_from_splits = 0
                    for i in range(num_splits):
                        split_start = split_dates[i]
                        split_end = split_dates[i + 1]
                        # Avoid overlapping ranges (add 1 second to start)
                        if i > 0:
                            split_start = split_start + timedelta(seconds=1)

                        count = fetch_reviews_with_time_partition(
                            base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
                        )
                        total_from_splits += count

                    return total_from_splits

                elif total_seconds < 7200:  # Less than 2 hours - split by minutes
                    # Split into 2-4 parts
                    num_splits = min(4, max(2, int(total_seconds / 1800)))
                    split_duration = time_diff / num_splits
                    split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]

                    total_from_splits = 0
                    for i in range(num_splits):
                        split_start = split_dates[i]
                        split_end = split_dates[i + 1]
                        # Avoid overlapping ranges (add 1 minute to start)
                        if i > 0:
                            split_start = split_start + timedelta(minutes=1)

                        count = fetch_reviews_with_time_partition(
                            base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
                        )
                        total_from_splits += count

                    return total_from_splits

                elif total_seconds < 172800:  # Less than 2 days - split by hours
                    # Split into 2-4 parts
                    num_splits = min(4, max(2, int(total_seconds / 43200)))
                    split_duration = time_diff / num_splits
                    split_dates = [start_date + split_duration * i for i in range(num_splits + 1)]

                    total_from_splits = 0
                    for i in range(num_splits):
                        split_start = split_dates[i]
                        split_end = split_dates[i + 1]
                        # Avoid overlapping ranges (add 1 hour to start)
                        if i > 0:
                            split_start = split_start + timedelta(hours=1)

                        count = fetch_reviews_with_time_partition(
                            base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
                        )
                        total_from_splits += count

                    return total_from_splits

                else:  # 2+ days - split by days
                    days_diff = time_diff.days

                    # Use aggressive splitting for large ranges or deep recursion
                    # Split into 4 parts if range is > 30 days, otherwise split in half
                    if days_diff > 30 or depth > 5:
                        # Split into 4 parts for more aggressive partitioning
                        quarter_diff = time_diff / 4
                        split_dates = [
                            start_date,
                            start_date + quarter_diff,
                            start_date + quarter_diff * 2,
                            start_date + quarter_diff * 3,
                            end_date
                        ]

                        total_from_splits = 0
                        for i in range(4):
                            split_start = split_dates[i]
                            split_end = split_dates[i + 1]
                            # Avoid overlapping ranges
                            if i > 0:
                                split_start = split_start + timedelta(days=1)

                            count = fetch_reviews_with_time_partition(
                                base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
                            )
                            total_from_splits += count

                        return total_from_splits
                    else:
                        # Binary split for smaller ranges
                        mid_date = start_date + time_diff / 2

                        # Recursively fetch both halves
                        count1 = fetch_reviews_with_time_partition(
                            base_query, start_date, mid_date, token_pool, prs_by_url, depth + 1
                        )
                        count2 = fetch_reviews_with_time_partition(
                            base_query, mid_date + timedelta(days=1), end_date, token_pool, prs_by_url, depth + 1
                        )

                        return count1 + count2

            # Normal pagination: check if there are more pages
            if len(items) < per_page or page >= 10:
                break

            page += 1
            time.sleep(0.5)  # Courtesy delay between pages

        except Exception as e:
            print(f"{indent}  Error fetching range {start_str} to {end_str}: {str(e)}")
            return total_in_partition

    if total_in_partition > 0:
        print(f"{indent}  βœ“ Found {total_in_partition} reviews in range {start_str} to {end_str}")

    return total_in_partition


def fetch_reviews_parallel(query_patterns, start_date, end_date, token_pool, prs_by_url):
    """
    Fetch reviews for multiple query patterns in parallel using available parallel tokens.

    This function uses ThreadPoolExecutor to execute multiple query patterns concurrently,
    with each pattern using a dedicated token from the parallel pool. Falls back to
    sequential execution if insufficient parallel tokens are available.

    Args:
        query_patterns: List of query pattern strings (e.g., ['is:pr author:bot1', 'is:pr reviewed-by:bot1'])
        start_date: Start datetime for time range
        end_date: End datetime for time range
        token_pool: TokenPool instance for token management
        prs_by_url: Dictionary to collect PRs by URL (shared across patterns)

    Returns:
        Total number of PRs found across all patterns
    """
    from concurrent.futures import ThreadPoolExecutor, as_completed
    import threading

    # Check how many parallel tokens are available
    available_tokens = token_pool.get_available_parallel_tokens()

    if len(available_tokens) < 2 or len(query_patterns) < 2:
        # Not enough tokens or patterns for parallelization, use sequential
        print(f"   ⚠️ Sequential execution: {len(available_tokens)} parallel tokens available for {len(query_patterns)} patterns")
        total_found = 0
        for pattern in query_patterns:
            pattern_prs = {}
            count = fetch_reviews_with_time_partition(
                pattern, start_date, end_date, token_pool, pattern_prs, depth=0
            )
            # Merge pattern results into global dict
            with threading.Lock():
                for url, pr in pattern_prs.items():
                    if url not in prs_by_url:
                        prs_by_url[url] = pr
            total_found += count
        return total_found

    # Use parallel execution
    print(f"   πŸš€ Parallel execution: {len(available_tokens)} parallel tokens for {len(query_patterns)} patterns")

    # Thread-safe lock for updating prs_by_url
    lock = threading.Lock()

    def fetch_pattern(pattern):
        """Fetch reviews for a single pattern (runs in parallel)."""
        pattern_prs = {}
        try:
            count = fetch_reviews_with_time_partition(
                pattern, start_date, end_date, token_pool, pattern_prs, depth=0
            )
            return pattern, pattern_prs, count
        except Exception as e:
            print(f"   Error fetching pattern '{pattern}': {str(e)}")
            return pattern, {}, 0

    # Execute patterns in parallel
    max_workers = min(len(query_patterns), len(available_tokens))
    total_found = 0

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all patterns
        future_to_pattern = {
            executor.submit(fetch_pattern, pattern): pattern
            for pattern in query_patterns
        }

        # Collect results as they complete
        for future in as_completed(future_to_pattern):
            pattern = future_to_pattern[future]
            try:
                _, pattern_prs, count = future.result()

                # Merge results into global dict (thread-safe)
                with lock:
                    for url, pr in pattern_prs.items():
                        if url not in prs_by_url:
                            prs_by_url[url] = pr

                total_found += count
                print(f"   βœ“ Pattern '{pattern}' completed: {count} PRs found")

            except Exception as e:
                print(f"   βœ— Pattern '{pattern}' failed: {str(e)}")

    return total_found


def extract_review_metadata(pr):
    """
    Extract minimal PR review metadata for efficient storage.
    Only keeps essential fields: url, reviewed_at, merged_at, closed_at.
    Note: agent_name is not stored as it's inferred from the folder structure.

    Status can be derived from the timestamps:
    - merged_at: Timestamp if PR was merged, None otherwise
    - closed_at: Timestamp if PR was closed (either merged or just closed), None otherwise

    Merged PR = PR that was merged (merged_at is not None)
    Rejected PR = PR that was closed without merging (closed_at is not None but merged_at is None)
    Open PR = PR still open (both merged_at and closed_at are None)
    """
    # Extract PR metadata from search results
    # The GitHub search API returns PR data from /search/issues endpoint
    url = pr.get('url')
    created_at = pr.get('created_at')
    closed_at = pr.get('closed_at')

    # Check if PR has pull_request field (indicates it's a PR, not an issue)
    pull_request_data = pr.get('pull_request', {})
    merged_at = pull_request_data.get('merged_at') if pull_request_data else None

    return {
        'url': url,
        'reviewed_at': created_at,  # When the PR was created (agent reviewed it)
        'merged_at': merged_at,
        'closed_at': closed_at
    }


def get_pr_status_from_metadata(review_meta):
    """
    Derive PR status from merged_at and closed_at fields.

    Args:
        review_meta: Dictionary containing merged_at and closed_at fields

    Returns:
        str: 'merged', 'closed', or 'open'
    """
    merged_at = review_meta.get('merged_at')
    closed_at = review_meta.get('closed_at')

    # If merged_at is set (not None and not False), PR is merged
    if merged_at:
        return 'merged'
    # If closed_at is set but not merged, PR is closed without merging
    elif closed_at:
        return 'closed'
    # Otherwise, PR is still open
    else:
        return 'open'


def calculate_review_stats_from_metadata(metadata_list):
    """
    Calculate statistics from a list of review metadata (lightweight objects).
    Works with minimal metadata: url, reviewed_at, merged_at, closed_at.

    Returns a dictionary with comprehensive review metrics.

    Acceptance Rate is calculated as:
        merged PRs / (merged PRs + rejected PRs) * 100

    Merged PRs = PRs that were merged (merged_at is not None)
    Rejected PRs = PRs that were closed without merging (closed_at is not None but merged_at is None)
    Pending PRs = PRs still open (both merged_at and closed_at are None) - excluded from acceptance rate
    """
    total_reviews = len(metadata_list)

    # Count merged PRs (merged_at is set)
    merged_prs = sum(1 for review_meta in metadata_list
                      if get_pr_status_from_metadata(review_meta) == 'merged')

    # Count rejected PRs (closed without merging)
    rejected_prs = sum(1 for review_meta in metadata_list
                      if get_pr_status_from_metadata(review_meta) == 'closed')

    # Count pending PRs (still open)
    pending_prs = sum(1 for review_meta in metadata_list
                     if get_pr_status_from_metadata(review_meta) == 'open')

    # Calculate acceptance rate (exclude pending PRs)
    completed_prs = merged_prs + rejected_prs
    acceptance_rate = (merged_prs / completed_prs * 100) if completed_prs > 0 else 0

    return {
        'total_reviews': total_reviews,
        'merged_prs': merged_prs,
        'pending_prs': pending_prs,
        'acceptance_rate': round(acceptance_rate, 2),
    }


def calculate_monthly_metrics_by_agent(top_n=None):
    """
    Calculate monthly metrics for all agents (or top N agents) for visualization.
    Loads data directly from SWE-Arena/review_metadata dataset.

    Args:
        top_n: If specified, only return metrics for the top N agents by total reviews.
               Agents are ranked by their total review count across all months.

    Returns:
        dict: {
            'agents': list of agent names,
            'months': list of month labels (e.g., '2025-01'),
            'data': {
                agent_name: {
                    'acceptance_rates': list of acceptance rates by month,
                    'total_reviews': list of review counts by month,
                    'merged_prs': list of merged PR counts by month,
                }
            }
        }
    """
    # Load ALL agents from HuggingFace agents repo
    agents = load_agents_from_hf()

    # Create mapping from agent_identifier to agent_name
    identifier_to_name = {agent.get('github_identifier'): agent.get('name') for agent in agents if agent.get('github_identifier')}

    # Load all review metadata from review_metadata dataset
    all_metadata = load_review_metadata()

    if not all_metadata:
        return {'agents': [], 'months': [], 'data': {}}

    # Group by agent and month
    agent_month_data = defaultdict(lambda: defaultdict(list))

    for review_meta in all_metadata:
        agent_identifier = review_meta.get('agent_identifier')
        reviewed_at = review_meta.get('reviewed_at')

        if not agent_identifier or not reviewed_at:
            continue

        # Get agent_name from identifier
        agent_name = identifier_to_name.get(agent_identifier, agent_identifier)

        try:
            dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
            month_key = f"{dt.year}-{dt.month:02d}"
            agent_month_data[agent_name][month_key].append(review_meta)
        except Exception as e:
            print(f"Warning: Could not parse date '{reviewed_at}': {e}")
            continue

    # Get all unique months and sort them
    all_months = set()
    for agent_data in agent_month_data.values():
        all_months.update(agent_data.keys())
    months = sorted(list(all_months))

    # Calculate metrics for each agent and month
    result_data = {}
    for agent_name, month_dict in agent_month_data.items():
        acceptance_rates = []
        total_reviews_list = []
        merged_prs_list = []

        for month in months:
            reviews_in_month = month_dict.get(month, [])

            # Count merged PRs (merged_at is set)
            merged_count = sum(1 for review in reviews_in_month
                                if get_pr_status_from_metadata(review) == 'merged')

            # Count rejected PRs (closed without merging)
            rejected_count = sum(1 for review in reviews_in_month
                                if get_pr_status_from_metadata(review) == 'closed')

            # Total reviews created in this month
            total_count = len(reviews_in_month)

            # Calculate acceptance rate (exclude pending PRs)
            completed_count = merged_count + rejected_count
            acceptance_rate = (merged_count / completed_count * 100) if completed_count > 0 else None

            acceptance_rates.append(acceptance_rate)
            total_reviews_list.append(total_count)
            merged_prs_list.append(merged_count)

        result_data[agent_name] = {
            'acceptance_rates': acceptance_rates,
            'total_reviews': total_reviews_list,
            'merged_prs': merged_prs_list,
        }

    # Filter to top N agents if specified
    agents_list = sorted(list(agent_month_data.keys()))
    if top_n is not None and top_n > 0:
        # Calculate total reviews for each agent across all months
        agent_totals = []
        for agent_name in agents_list:
            total_reviews = sum(result_data[agent_name]['total_reviews'])
            agent_totals.append((agent_name, total_reviews))

        # Sort by total reviews (descending) and take top N
        agent_totals.sort(key=lambda x: x[1], reverse=True)
        top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]

        # Filter result_data to only include top agents
        result_data = {agent: result_data[agent] for agent in top_agents if agent in result_data}
        agents_list = top_agents

    return {
        'agents': agents_list,
        'months': months,
        'data': result_data
    }


# =============================================================================
# REVIEW METADATA STORAGE & RETRIEVAL
# =============================================================================

def group_metadata_by_date(metadata_list):
    """
    Group review metadata by exact date (year.month.day) for efficient daily storage.
    Returns dict: {(year, month, day): [metadata_list]}
    """
    grouped = defaultdict(list)

    for review_meta in metadata_list:
        reviewed_at = review_meta.get('reviewed_at')
        if not reviewed_at:
            continue

        try:
            dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
            key = (dt.year, dt.month, dt.day)
            grouped[key].append(review_meta)
        except Exception as e:
            print(f"Warning: Could not parse date '{reviewed_at}': {e}")

    return dict(grouped)


def save_review_metadata_to_hf(metadata_list, agent_identifier):
    """
    Save review metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
    Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's reviews.

    This function APPENDS new metadata and DEDUPLICATES by URL.
    Uses batch upload to avoid rate limit (uploads entire folder in single commit).

    Args:
        metadata_list: List of review metadata dictionaries
        agent_identifier: GitHub identifier of the agent (used as folder name)
    """
    import tempfile
    import shutil

    try:
        token = get_hf_token()
        if not token:
            raise Exception("No HuggingFace token found")

        api = HfApi()

        # Group by exact date (year, month, day)
        grouped = group_metadata_by_date(metadata_list)

        # Create a temporary directory for batch upload
        temp_dir = tempfile.mkdtemp()
        agent_folder = os.path.join(temp_dir, agent_identifier)
        os.makedirs(agent_folder, exist_ok=True)

        try:
            print(f"πŸ“¦ Preparing batch upload for {len(grouped)} daily files...")

            # Process each daily file
            for (review_year, month, day), day_metadata in grouped.items():
                filename = f"{agent_identifier}/{review_year}.{month:02d}.{day:02d}.jsonl"
                local_filename = os.path.join(agent_folder, f"{review_year}.{month:02d}.{day:02d}.jsonl")

                # Download existing file if it exists
                existing_metadata = []
                try:
                    file_path = hf_hub_download(
                        repo_id=REVIEW_METADATA_REPO,
                        filename=filename,
                        repo_type="dataset",
                        token=token
                    )
                    existing_metadata = load_jsonl(file_path)
                    print(f"   Found {len(existing_metadata)} existing reviews in {filename}")
                except Exception:
                    print(f"   Creating new file: {filename}")

                # Merge and deduplicate by URL
                existing_by_url = {meta['url']: meta for meta in existing_metadata if meta.get('url')}
                new_by_url = {meta['url']: meta for meta in day_metadata if meta.get('url')}

                # Update with new data (new data overwrites old)
                existing_by_url.update(new_by_url)
                merged_metadata = list(existing_by_url.values())

                # Save to temp directory
                save_jsonl(local_filename, merged_metadata)
                print(f"   Prepared {len(merged_metadata)} reviews for {filename}")

            # Upload entire folder using upload_large_folder (optimized for large files)
            # Note: upload_large_folder creates multiple commits automatically and doesn't support custom commit_message
            print(f"πŸ“€ Uploading {len(grouped)} files...")
            upload_large_folder_with_backoff(
                api=api,
                folder_path=temp_dir,
                repo_id=REVIEW_METADATA_REPO,
                repo_type="dataset"
            )
            print(f"   βœ“ Batch upload complete for {agent_identifier}")

            return True

        finally:
            # Always clean up temp directory
            if os.path.exists(temp_dir):
                shutil.rmtree(temp_dir)

    except Exception as e:
        print(f"βœ— Error saving review metadata: {str(e)}")
        import traceback
        traceback.print_exc()
        return False


def load_review_metadata():
    """
    Load review metadata from the last LEADERBOARD_TIME_FRAME_DAYS.

    Structure: [agent_identifier]/YYYY.MM.DD.jsonl

    Returns:
        List of dictionaries with 'agent_identifier' added to each review metadata.
        Only includes reviews from the last LEADERBOARD_TIME_FRAME_DAYS.
    """
    # Calculate cutoff date based on LEADERBOARD_TIME_FRAME_DAYS
    current_time = datetime.now(timezone.utc)
    cutoff_date = current_time - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)

    try:
        api = HfApi()
        token = get_hf_token()

        # List all files in the repository
        files = list_repo_files_with_backoff(api=api, repo_id=REVIEW_METADATA_REPO, repo_type="dataset")

        # Filter for files matching the pattern: [agent_identifier]/YYYY.MM.DD.jsonl
        # AND within the time frame (parse date from filename)
        time_frame_files = []
        for f in files:
            if f.endswith('.jsonl'):
                parts = f.split('/')
                if len(parts) == 2:  # [agent_identifier]/YYYY.MM.DD.jsonl
                    filename = parts[1]
                    # Parse date from filename: YYYY.MM.DD.jsonl
                    try:
                        date_part = filename.replace('.jsonl', '')  # Get YYYY.MM.DD
                        date_components = date_part.split('.')
                        if len(date_components) == 3:
                            file_year, file_month, file_day = map(int, date_components)
                            file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)

                            # Only include files within the time frame
                            if file_date >= cutoff_date:
                                time_frame_files.append(f)
                    except Exception:
                        # If we can't parse the date, skip this file
                        continue

        print(f"πŸ“₯ Loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(time_frame_files)} daily files across all agents)...")

        all_metadata = []
        agent_identifiers_found = set()

        for filename in time_frame_files:
            try:
                # Extract agent_identifier from path (first part)
                # Format: agent_identifier/YYYY.MM.DD.jsonl
                parts = filename.split('/')
                if len(parts) != 2:
                    print(f"   Warning: Unexpected filename format: {filename}")
                    continue

                agent_identifier = parts[0]
                agent_identifiers_found.add(agent_identifier)

                file_path = hf_hub_download_with_backoff(
                    repo_id=REVIEW_METADATA_REPO,
                    filename=filename,
                    repo_type="dataset",
                    token=token
                )
                day_metadata = load_jsonl(file_path)

                # Add agent_identifier and filter by time frame (double-check)
                filtered_count = 0
                for review_meta in day_metadata:
                    # Validate review date is within time frame
                    reviewed_at = review_meta.get('reviewed_at')
                    if reviewed_at:
                        try:
                            dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
                            if dt < cutoff_date:
                                continue  # Skip reviews older than time frame
                        except Exception:
                            pass  # Keep reviews with unparseable dates

                    review_meta['agent_identifier'] = agent_identifier
                    all_metadata.append(review_meta)
                    filtered_count += 1

                print(f"   βœ“ Loaded {filtered_count} reviews from {filename}")
            except Exception as e:
                print(f"   Warning: Could not load {filename}: {str(e)}")

        print(f"βœ“ Loaded {len(all_metadata)} total reviews from last {LEADERBOARD_TIME_FRAME_DAYS} days")

        return all_metadata

    except Exception as e:
        print(f"βœ— Error loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days: {str(e)}")
        return []


def get_latest_review_date_for_agent(agent_identifier):
    """
    Get the latest review creation date for an agent from stored metadata.
    Used for incremental updates - only fetch reviews newer than this date.

    Structure: [agent_identifier]/YYYY.MM.DD.jsonl

    Args:
        agent_identifier: GitHub identifier of the agent

    Returns:
        datetime or None if no existing reviews found.
    """
    try:
        api = HfApi()
        token = get_hf_token()

        # List all files in the repository
        files = list_repo_files_with_backoff(api=api, repo_id=REVIEW_METADATA_REPO, repo_type="dataset")

        # Filter for files in this agent's folder
        # New structure: [agent_identifier]/YYYY.MM.DD.jsonl
        agent_pattern = f"{agent_identifier}/"
        agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')]

        if not agent_files:
            return None

        # Find latest created_at across all files
        latest_date = None
        for filename in agent_files:
            try:
                file_path = hf_hub_download_with_backoff(
                    repo_id=REVIEW_METADATA_REPO,
                    filename=filename,
                    repo_type="dataset",
                    token=token
                )
                metadata = load_jsonl(file_path)

                for review_meta in metadata:
                    reviewed_at = review_meta.get("reviewed_at")
                    if reviewed_at:
                        try:
                            dt = datetime.fromisoformat(reviewed_at.replace("Z", "+00:00"))
                            if latest_date is None or dt > latest_date:
                                latest_date = dt
                        except Exception:
                            continue
            except Exception:
                continue

        return latest_date

    except Exception:
        return None


def get_daily_files_last_time_frame(agent_identifier):
    """
    Get list of daily file paths for an agent from the configured time frame.

    Args:
        agent_identifier: GitHub identifier of the agent

    Returns:
        List of file paths in format: [agent_identifier]/YYYY.MM.DD.jsonl
    """
    try:
        api = HfApi()
        token = get_hf_token()

        # Calculate date range using configured time frame
        today = datetime.now(timezone.utc)
        cutoff_date = today - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)

        # List all files in the repository
        files = list_repo_files_with_backoff(api=api, repo_id=REVIEW_METADATA_REPO, repo_type="dataset")

        # Filter for files in this agent's folder
        agent_pattern = f"{agent_identifier}/"
        agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')]

        # Filter by date range (extract date from filename)
        recent_files = []
        for filename in agent_files:
            try:
                # Extract date from filename: YYYY.MM.DD.jsonl
                parts = filename.split('/')
                if len(parts) != 2:
                    continue

                date_part = parts[1].replace('.jsonl', '')  # Get YYYY.MM.DD
                date_components = date_part.split('.')
                if len(date_components) != 3:
                    continue

                file_year, file_month, file_day = map(int, date_components)
                file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)

                # Include if within configured time frame
                if cutoff_date <= file_date <= today:
                    recent_files.append(filename)
            except Exception:
                continue

        return recent_files

    except Exception as e:
        print(f"Error getting daily files: {str(e)}")
        return []




def fetch_review_current_status(review_url, token):
    """
    Fetch the current revert status of a single review from GitHub API.

    Args:
        token: GitHub API token
        token: GitHub API token

    Returns:
        Dictionary with updated is_reverted and revert_at, or None if failed
    """
    try:
        # Convert HTML URL to API URL
        # https://github.com/owner/repo/reviews/123 -> https://api.github.com/repos/owner/repo/reviews/123
        parts = review_url.replace('https://github.com/', '').split('/')
        if len(parts) < 4:
            return None

        owner, repo, review_word, review_number = parts[0], parts[1], parts[2], parts[3]
        api_url = f'https://api.github.com/repos/{owner}/{repo}/reviews/{review_number}'

        headers = {'Authorization': f'token {token}'} if token else {}
        response = request_with_backoff('GET', api_url, headers=headers, max_retries=3)

        if response is None or response.status_code != 200:
            return None

        review_data = response.json()
        state = review_data.get('state')
        state_reason = review_data.get('state_reason')
        closed_at = review_data.get('closed_at')

        return {
            'state': state,
            'state_reason': state_reason,
            'closed_at': closed_at
        }

    except Exception as e:
        print(f"   Error fetching review status for {review_url}: {str(e)}")
        return None


def refresh_review_status_for_agent(agent_identifier, token):
    """
    Refresh status for all open reviews from the last month for an agent.
    Only updates reviews that are still open (state="open" or no state_reason).

    This implements the smart update strategy:
    - Skip reviews that are already closed/resolved
    - Fetch current status for open reviews
    - Update and save back to daily files

    Args:
        agent_identifier: GitHub identifier of the agent
        token: GitHub API token

    Returns:
        Tuple: (total_checked, updated_count)
    """
    print(f"\nπŸ”„ Refreshing open reviews for {agent_identifier} (last month)...")

    try:
        # Get daily files from configured time frame
        recent_files = get_daily_files_last_time_frame(agent_identifier)

        if not recent_files:
            print(f"   No recent files found for {agent_identifier}")
            return (0, 0)

        print(f"   Found {len(recent_files)} daily files to check")

        total_checked = 0
        updated_count = 0

        # Process each file
        for filename in recent_files:
            try:
                # Download file
                file_path = hf_hub_download(
                    repo_id=REVIEW_METADATA_REPO,
                    filename=filename,
                    repo_type="dataset",
                    token=get_hf_token()
                )
                reviews = load_jsonl(file_path)

                if not reviews:
                    continue

                updated_reviews = []
                file_had_updates = False

                # Check each review
                for review in reviews:
                    # Skip if already closed (has a state_reason)
                    if review.get("is_reverted"):
                        updated_reviews.append(review)
                        continue

                    # Review may have been reverted, check status
                    review_url = review.get("url")

                    if not review_url:
                        updated_reviews.append(review)
                        continue

                    current_status = fetch_review_current_status(review_url, token)

                    if current_status:
                        # Check if status changed (now closed)
                        if current_status['state'] == 'closed':
                            print(f"   βœ“ Review status changed: {review_url}")
                            review['state'] = current_status['state']
                            review['state_reason'] = current_status['state_reason']
                            review['closed_at'] = current_status['closed_at']
                            updated_count += 1
                            file_had_updates = True

                    updated_reviews.append(review)
                    time.sleep(0.1)  # Rate limiting courtesy delay

                # Save file if there were updates
                if file_had_updates:
                    # Extract filename components for local save
                    parts = filename.split('/')
                    local_filename = parts[-1]  # Just YYYY.MM.DD.jsonl

                    # Save locally
                    save_jsonl(local_filename, updated_reviews)

                    try:
                        # Upload back to HuggingFace
                        api = HfApi()
                        upload_with_retry(
                            api=api,
                            path_or_fileobj=local_filename,
                            path_in_repo=filename,
                            repo_id=REVIEW_METADATA_REPO,
                            repo_type="dataset",
                            token=get_hf_token()
                        )
                        print(f"   πŸ’Ύ Updated {filename}")
                    finally:
                        # Always clean up local file, even if upload fails
                        if os.path.exists(local_filename):
                            os.remove(local_filename)

            except Exception as e:
                print(f"   Warning: Could not process {filename}: {str(e)}")
                continue

        print(f"   βœ… Refresh complete: {total_checked} open reviews checked, {updated_count} updated")
        return (total_checked, updated_count)

    except Exception as e:
        print(f"   βœ— Error refreshing reviews for {agent_identifier}: {str(e)}")
        return (0, 0)


# =============================================================================
# HUGGINGFACE DATASET OPERATIONS
# =============================================================================

def load_agents_from_hf():
    """Load all agent metadata JSON files from HuggingFace dataset."""
    try:
        api = HfApi()
        agents = []

        # List all files in the repository
        files = list_repo_files_with_backoff(api=api, repo_id=AGENTS_REPO, repo_type="dataset")

        # Filter for JSON files only
        json_files = [f for f in files if f.endswith('.json')]

        # Download and parse each JSON file
        for json_file in json_files:
            try:
                file_path = hf_hub_download_with_backoff(
                    repo_id=AGENTS_REPO,
                    filename=json_file,
                    repo_type="dataset"
                )

                with open(file_path, 'r') as f:
                    agent_data = json.load(f)

                    # Only process agents with status == "public"
                    if agent_data.get('status') != 'public':
                        print(f"Skipping {json_file}: status is not 'public'")
                        continue

                    # Extract github_identifier from filename (e.g., "claude[bot].json" -> "claude[bot]")
                    filename_identifier = json_file.replace('.json', '')

                    # Add or override github_identifier to match filename
                    agent_data['github_identifier'] = filename_identifier

                    agents.append(agent_data)

            except Exception as e:
                print(f"Warning: Could not load {json_file}: {str(e)}")
                continue

        print(f"βœ“ Loaded {len(agents)} agents from HuggingFace")
        return agents

    except Exception as e:
        print(f"Could not load agents from HuggingFace: {str(e)}")
        return None




def get_hf_token():
    """Get HuggingFace token from environment variables."""
    token = os.getenv('HF_TOKEN')
    if not token:
        print("Warning: HF_TOKEN not found in environment variables")
    return token


def upload_with_retry(api, path_or_fileobj, path_in_repo, repo_id, repo_type, token, max_retries=5):
    """
    Upload file to HuggingFace with exponential backoff retry logic.

    Args:
        api: HfApi instance
        path_or_fileobj: Local file path to upload
        path_in_repo: Target path in the repository
        repo_id: Repository ID
        repo_type: Type of repository (e.g., "dataset")
        token: HuggingFace token
        max_retries: Maximum number of retry attempts

    Returns:
        True if upload succeeded, raises exception if all retries failed
    """
    delay = 2.0  # Initial delay in seconds

    for attempt in range(max_retries):
        try:
            api.upload_file(
                path_or_fileobj=path_or_fileobj,
                path_in_repo=path_in_repo,
                repo_id=repo_id,
                repo_type=repo_type,
                token=token
            )
            if attempt > 0:
                print(f"   βœ“ Upload succeeded on attempt {attempt + 1}/{max_retries}")
            return True

        except Exception as e:
            if attempt < max_retries - 1:
                wait_time = delay + random.uniform(0, 1.0)
                print(f"   ⚠️ Upload failed (attempt {attempt + 1}/{max_retries}): {str(e)}")
                print(f"   ⏳ Retrying in {wait_time:.1f} seconds...")
                time.sleep(wait_time)
                delay = min(delay * 2, 60.0)  # Exponential backoff, max 60s
            else:
                print(f"   βœ— Upload failed after {max_retries} attempts: {str(e)}")
                raise


def save_agent_to_hf(data):
    """Save a new agent to HuggingFace dataset as {identifier}.json in root."""
    try:
        api = HfApi()
        token = get_hf_token()

        if not token:
            raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")

        identifier = data['github_identifier']
        filename = f"{identifier}.json"

        # Save locally first
        with open(filename, 'w') as f:
            json.dump(data, f, indent=2)

        try:
            # Upload to HuggingFace (root directory)
            upload_with_retry(
                api=api,
                path_or_fileobj=filename,
                path_in_repo=filename,
                repo_id=AGENTS_REPO,
                repo_type="dataset",
                token=token
            )
            print(f"βœ“ Saved agent to HuggingFace: {filename}")
            return True
        finally:
            # Always clean up local file, even if upload fails
            if os.path.exists(filename):
                os.remove(filename)

    except Exception as e:
        print(f"βœ— Error saving agent: {str(e)}")
        return False


def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
    """
    Save leaderboard data and monthly metrics to HuggingFace dataset as swe-review.json.

    Args:
        leaderboard_dict: Dictionary of agent stats from construct_leaderboard_from_metadata()
        monthly_metrics: Monthly metrics data from calculate_monthly_metrics_by_agent()

    Returns:
        bool: True if successful, False otherwise
    """
    try:
        api = HfApi()
        token = get_hf_token()

        if not token:
            raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")

        filename = "swe-review.json"

        # Combine leaderboard and monthly metrics
        combined_data = {
            'last_updated': datetime.now(timezone.utc).isoformat(),
            'leaderboard': leaderboard_dict,
            'monthly_metrics': monthly_metrics,
            'metadata': {
                'leaderboard_time_frame_days': LEADERBOARD_TIME_FRAME_DAYS,
                'update_time_frame_days': UPDATE_TIME_FRAME_DAYS
            }
        }

        # Save locally first
        with open(filename, 'w') as f:
            json.dump(combined_data, f, indent=2)

        try:
            # Upload to HuggingFace
            upload_with_retry(
                api=api,
                path_or_fileobj=filename,
                path_in_repo=filename,
                repo_id=LEADERBOARD_REPO,
                repo_type="dataset",
                token=token
            )
            print(f"βœ“ Saved leaderboard data to HuggingFace: {filename}")
            return True
        finally:
            # Always clean up local file, even if upload fails
            if os.path.exists(filename):
                os.remove(filename)

    except Exception as e:
        print(f"βœ— Error saving leaderboard data: {str(e)}")
        import traceback
        traceback.print_exc()
        return False


def load_leaderboard_data_from_hf():
    """
    Load leaderboard data and monthly metrics from HuggingFace dataset.

    Returns:
        dict: Dictionary with 'leaderboard', 'monthly_metrics', and 'last_updated' keys
              Returns None if file doesn't exist or error occurs
    """
    try:
        token = get_hf_token()
        filename = "swe-review.json"

        # Download file
        file_path = hf_hub_download_with_backoff(
            repo_id=LEADERBOARD_REPO,
            filename=filename,
            repo_type="dataset",
            token=token
        )

        # Load JSON data
        with open(file_path, 'r') as f:
            data = json.load(f)

        last_updated = data.get('last_updated', 'Unknown')
        print(f"βœ“ Loaded leaderboard data from HuggingFace (last updated: {last_updated})")

        return data

    except Exception as e:
        print(f"⚠️ Could not load leaderboard data from HuggingFace: {str(e)}")
        return None


def save_leaderboard_and_metrics_to_hf():
    """
    Creates a comprehensive JSON file with both leaderboard stats and monthly metrics.
    If the file exists, it will be overwritten.

    Returns:
        bool: True if successful, False otherwise
    """
    import io

    try:
        token = get_hf_token()
        if not token:
            raise Exception("No HuggingFace token found")

        api = HfApi(token=token)

        print(f"\n{'='*80}")
        print(f"πŸ“Š Preparing leaderboard and metrics data for upload...")
        print(f"{'='*80}\n")

        # Get leaderboard data from review metadata
        print("   Constructing leaderboard data from review metadata...")
        leaderboard_data = construct_leaderboard_from_metadata()

        # Get monthly metrics data (all agents, not just top N)
        print("   Calculating monthly metrics from review metadata...")
        monthly_metrics = calculate_monthly_metrics_by_agent(top_n=None)

        # Combine into a single structure
        combined_data = {
            "leaderboard": leaderboard_data,
            "monthly_metrics": monthly_metrics,
            "metadata": {
                "last_updated": datetime.now(timezone.utc).isoformat(),
                "time_frame_days": LEADERBOARD_TIME_FRAME_DAYS,
                "total_agents": len(leaderboard_data)
            }
        }

        print(f"   Leaderboard entries: {len(leaderboard_data)}")
        print(f"   Monthly metrics for: {len(monthly_metrics['agents'])} agents")
        print(f"   Time frame: {LEADERBOARD_TIME_FRAME_DAYS} days")

        # Convert to JSON and create file-like object
        json_content = json.dumps(combined_data, indent=2)
        file_like_object = io.BytesIO(json_content.encode('utf-8'))

        # Upload to HuggingFace (will overwrite if exists)
        print(f"\nπŸ€— Uploading to {LEADERBOARD_REPO}...")
        upload_file_with_backoff(
            api=api,
            path_or_fileobj=file_like_object,
            path_in_repo="swe-review.json",
            repo_id=LEADERBOARD_REPO,
            repo_type="dataset",
            token=token,
            commit_message=f"Update leaderboard data - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC"
        )

        print(f"   βœ“ Successfully uploaded swe-review.json")
        print(f"{'='*80}\n")

        return True

    except Exception as e:
        print(f"βœ— Error saving leaderboard and metrics: {str(e)}")
        import traceback
        traceback.print_exc()
        return False



# =============================================================================
# DATA MANAGEMENT
# =============================================================================

def mine_all_agents():
    """
    Mine review metadata for all agents within UPDATE_TIME_FRAME_DAYS and save to HuggingFace.
    Uses BATCHED BigQuery queries for all agents (efficient approach).
    """
    # Load agent metadata from HuggingFace
    agents = load_agents_from_hf()
    if not agents:
        print("No agents found in HuggingFace dataset")
        return

    # Extract all identifiers
    identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
    if not identifiers:
        print("No valid agent identifiers found")
        return

    print(f"\n{'='*80}")
    print(f"Starting review metadata mining for {len(identifiers)} agents")
    print(f"Time frame: Last {UPDATE_TIME_FRAME_DAYS} days")
    print(f"Data source: BigQuery + GitHub Archive (BATCHED QUERIES)")
    print(f"{'='*80}\n")

    # Initialize BigQuery client
    try:
        client = get_bigquery_client()
    except Exception as e:
        print(f"βœ— Failed to initialize BigQuery client: {str(e)}")
        return

    # Define time range: past UPDATE_TIME_FRAME_DAYS (excluding today)
    current_time = datetime.now(timezone.utc)
    end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0)
    start_date = end_date - timedelta(days=UPDATE_TIME_FRAME_DAYS)

    try:
        # Use batched approach for better performance
        # upload_immediately=True means each batch uploads to HuggingFace right after BigQuery completes
        all_metadata = fetch_all_pr_metadata_batched(
            client, identifiers, start_date, end_date, batch_size=50, upload_immediately=True
        )

        # Calculate summary statistics
        total_prs = sum(len(metadata_list) for metadata_list in all_metadata.values())
        agents_with_data = sum(1 for metadata_list in all_metadata.values() if metadata_list)

        print(f"\n{'='*80}")
        print(f"βœ… BigQuery mining and upload complete!")
        print(f"   Total agents: {len(agents)}")
        print(f"   Agents with data: {agents_with_data}")
        print(f"   Total PRs found: {total_prs}")
        print(f"{'='*80}\n")

    except Exception as e:
        print(f"βœ— Error during BigQuery fetch: {str(e)}")
        import traceback
        traceback.print_exc()
        return

    # After mining is complete, save leaderboard and metrics to HuggingFace
    print(f"πŸ“€ Uploading leaderboard and metrics data...")
    if save_leaderboard_and_metrics_to_hf():
        print(f"βœ“ Leaderboard and metrics successfully uploaded to {LEADERBOARD_REPO}")
    else:
        print(f"⚠️ Failed to upload leaderboard and metrics data")


def construct_leaderboard_from_metadata():
    """
    Construct leaderboard from stored review metadata instead of fetching all reviews.
    Much more memory-efficient and faster.

    Returns dictionary of agent stats.
    """
    print("πŸ“Š Constructing leaderboard from review metadata...")

    # Load agents
    agents = load_agents_from_hf()
    if not agents:
        print("⚠️ No agents found")
        return {}

    print(f"βœ“ Loaded {len(agents)} agents")

    # Load all review metadata
    all_metadata = load_review_metadata()
    print(f"βœ“ Loaded {len(all_metadata)} review metadata entries")

    cache_dict = {}

    for agent in agents:
        identifier = agent.get('github_identifier')
        agent_name = agent.get('name', 'Unknown')

        # Filter metadata for this agent
        agent_metadata = [review for review in all_metadata if review.get("agent_identifier") == identifier]

        # Calculate stats
        stats = calculate_review_stats_from_metadata(agent_metadata)

        cache_dict[identifier] = {
            'name': agent_name,
            'name': agent_name,  # Store both for compatibility
            'website': agent.get('website', 'N/A'),
            'github_identifier': identifier,
            **stats
        }

    print(f"βœ“ Constructed cache with {len(cache_dict)} agent entries")

    return cache_dict


# =============================================================================
# UI FUNCTIONS
# =============================================================================

def create_monthly_metrics_plot(top_n=5):
    """
    Create a Plotly figure with dual y-axes showing:
    - Left y-axis: Acceptance Rate (%) as line curves
    - Right y-axis: Total Reviews created as bar charts

    Each agent gets a unique color for both their line and bars.

    Args:
        top_n: Number of top agents to show (default: 5)
    """
    # Try loading from saved dataset first
    saved_data = load_leaderboard_data_from_hf()

    if saved_data and 'monthly_metrics' in saved_data:
        metrics = saved_data['monthly_metrics']
        print(f"πŸ“ˆ Loaded monthly metrics from saved dataset")

        # Apply top_n filter if specified
        if top_n is not None and top_n > 0 and metrics.get('agents'):
            # Calculate total reviews for each agent
            agent_totals = []
            for agent_name in metrics['agents']:
                agent_data = metrics['data'].get(agent_name, {})
                total_reviews = sum(agent_data.get('total_reviews', []))
                agent_totals.append((agent_name, total_reviews))

            # Sort by total reviews and take top N
            agent_totals.sort(key=lambda x: x[1], reverse=True)
            top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]

            # Filter metrics to only include top agents
            metrics = {
                'agents': top_agents,
                'months': metrics['months'],
                'data': {agent: metrics['data'][agent] for agent in top_agents if agent in metrics['data']}
            }
    else:
        # Fallback: calculate from metadata if saved data doesn't exist
        print(f"πŸ“ˆ Saved data not available, calculating monthly metrics from metadata...")
        metrics = calculate_monthly_metrics_by_agent(top_n=top_n)

    if not metrics['agents'] or not metrics['months']:
        # Return an empty figure with a message
        fig = go.Figure()
        fig.add_annotation(
            text="No data available for visualization",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=16)
        )
        fig.update_layout(
            title=None,
            xaxis_title=None,
            height=500
        )
        return fig

    # Create figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Generate unique colors for many agents using HSL color space
    def generate_color(index, total):
        """Generate distinct colors using HSL color space for better distribution"""
        hue = (index * 360 / total) % 360
        saturation = 70 + (index % 3) * 10  # Vary saturation slightly
        lightness = 45 + (index % 2) * 10   # Vary lightness slightly
        return f'hsl({hue}, {saturation}%, {lightness}%)'

    agents = metrics['agents']
    months = metrics['months']
    data = metrics['data']

    # Generate colors for all agents
    agent_colors = {agent: generate_color(idx, len(agents)) for idx, agent in enumerate(agents)}

    # Add traces for each agent
    for idx, agent_name in enumerate(agents):
        color = agent_colors[agent_name]
        agent_data = data[agent_name]

        # Add line trace for acceptance rate (left y-axis)
        acceptance_rates = agent_data['acceptance_rates']
        # Filter out None values for plotting
        x_acceptance = [month for month, rate in zip(months, acceptance_rates) if rate is not None]
        y_acceptance = [rate for rate in acceptance_rates if rate is not None]

        if x_acceptance and y_acceptance:  # Only add trace if there's data
            fig.add_trace(
                go.Scatter(
                    x=x_acceptance,
                    y=y_acceptance,
                    name=agent_name,
                    mode='lines+markers',
                    line=dict(color=color, width=2),
                    marker=dict(size=8),
                    legendgroup=agent_name,
                    showlegend=(top_n is not None and top_n <= 10),  # Show legend for top N agents
                    hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
                                 'Month: %{x}<br>' +
                                 'Acceptance Rate: %{y:.2f}%<br>' +
                                 '<extra></extra>'
                ),
                secondary_y=False
            )

        # Add bar trace for total reviews (right y-axis)
        # Only show bars for months where agent has reviews
        x_bars = []
        y_bars = []
        for month, count in zip(months, agent_data['total_reviews']):
            if count > 0:  # Only include months with reviews
                x_bars.append(month)
                y_bars.append(count)

        if x_bars and y_bars:  # Only add trace if there's data
            fig.add_trace(
                go.Bar(
                    x=x_bars,
                    y=y_bars,
                    name=agent_name,
                    marker=dict(color=color, opacity=0.6),
                    legendgroup=agent_name,
                    showlegend=False,  # Hide duplicate legend entry (already shown in Scatter)
                    hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
                                 'Month: %{x}<br>' +
                                 'Total Reviews: %{y}<br>' +
                                 '<extra></extra>',
                    offsetgroup=agent_name  # Group bars by agent for proper spacing
                ),
                secondary_y=True
            )

    # Update axes labels
    fig.update_xaxes(title_text=None)
    fig.update_yaxes(
        title_text="<b>Acceptance Rate (%)</b>",
        range=[0, 100],
        secondary_y=False,
        showticklabels=True,
        tickmode='linear',
        dtick=10,
        showgrid=True
    )
    fig.update_yaxes(title_text="<b>Total Reviews</b>", secondary_y=True)

    # Update layout
    show_legend = (top_n is not None and top_n <= 10)
    fig.update_layout(
        title=None,
        hovermode='closest',  # Show individual agent info on hover
        barmode='group',
        height=600,
        showlegend=show_legend,
        margin=dict(l=50, r=150 if show_legend else 50, t=50, b=50)  # More right margin when legend is shown
    )

    return fig


def get_leaderboard_dataframe():
    """
    Load leaderboard from saved dataset and convert to pandas DataFrame for display.
    Falls back to constructing from metadata if saved data is not available.
    Returns formatted DataFrame sorted by total reviews.
    """
    # Try loading from saved dataset first
    saved_data = load_leaderboard_data_from_hf()

    if saved_data and 'leaderboard' in saved_data:
        cache_dict = saved_data['leaderboard']
        print(f"πŸ“Š Loaded leaderboard from saved dataset (last updated: {saved_data.get('last_updated', 'Unknown')})")
    else:
        # Fallback: construct from metadata if saved data doesn't exist
        print(f"πŸ“Š Saved data not available, constructing leaderboard from metadata...")
        cache_dict = construct_leaderboard_from_metadata()

    print(f"πŸ“Š Cache dict size: {len(cache_dict)}")

    if not cache_dict:
        print("⚠️ WARNING: cache_dict is empty!")
        # Return empty DataFrame with correct columns if no data
        column_names = [col[0] for col in LEADERBOARD_COLUMNS]
        return pd.DataFrame(columns=column_names)

    rows = []
    filtered_count = 0
    for identifier, data in cache_dict.items():
        total_reviews = data.get('total_reviews', 0)
        print(f"   Agent '{identifier}': {total_reviews} reviews")

        # Filter out agents with zero total reviews
        if total_reviews == 0:
            filtered_count += 1
            continue

        # Only include display-relevant fields
        rows.append([
            data.get('name', 'Unknown'),
            data.get('website', 'N/A'),
            total_reviews,
            data.get('merged_prs', 0),
            data.get('acceptance_rate', 0.0),
        ])

    print(f"πŸ“‰ Filtered out {filtered_count} agents with 0 reviews")
    print(f"πŸ“ˆ Leaderboard will show {len(rows)} agents")

    # Create DataFrame
    column_names = [col[0] for col in LEADERBOARD_COLUMNS]
    df = pd.DataFrame(rows, columns=column_names)

    # Ensure numeric types
    numeric_cols = ["Total Reviews", "Merged PRs", "Acceptance Rate (%)"]
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    # Sort by Total Reviews descending
    if "Total Reviews" in df.columns and not df.empty:
        df = df.sort_values(by="Total Reviews", ascending=False).reset_index(drop=True)

    print(f"βœ… Final DataFrame shape: {df.shape}")
    print("="*60 + "\n")

    return df


def submit_agent(identifier, agent_name, developer, website):
    """
    Submit a new agent to the leaderboard.
    Validates input, saves submission, and fetches PR metadata (memory-efficient).
    """
    # Validate required fields
    if not identifier or not identifier.strip():
        return "❌ GitHub identifier is required", get_leaderboard_dataframe()
    if not agent_name or not agent_name.strip():
        return "❌ Agent name is required", get_leaderboard_dataframe()
    if not developer or not developer.strip():
        return "❌ Developer name is required", get_leaderboard_dataframe()
    if not website or not website.strip():
        return "❌ Website URL is required", get_leaderboard_dataframe()

    # Clean inputs
    identifier = identifier.strip()
    agent_name = agent_name.strip()
    developer = developer.strip()
    website = website.strip()

    # Validate GitHub identifier
    is_valid, message = validate_github_username(identifier)
    if not is_valid:
        return f"❌ {message}", get_leaderboard_dataframe()

    # Check for duplicates by loading agents from HuggingFace
    agents = load_agents_from_hf()
    if agents:
        existing_names = {agent['github_identifier'] for agent in agents}
        if identifier in existing_names:
            return f"⚠️ Agent with identifier '{identifier}' already exists", get_leaderboard_dataframe()

    # Create submission
    submission = {
        'name': agent_name,
        'developer': developer,
        'github_identifier': identifier,
        'website': website,
    }

    # Save to HuggingFace
    if not save_agent_to_hf(submission):
        return "❌ Failed to save submission", get_leaderboard_dataframe()

    # Reconstruct and save leaderboard data with new agent
    try:
        print(f"πŸ“Š Reconstructing leaderboard with new agent...")
        leaderboard_dict = construct_leaderboard_from_metadata()
        monthly_metrics = calculate_monthly_metrics_by_agent()
        save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
        print(f"βœ“ Leaderboard data updated")
    except Exception as e:
        print(f"⚠️ Failed to update leaderboard data: {str(e)}")

    # Return success message - data will be populated by daily incremental updates
    return f"βœ… Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()


# =============================================================================
# GRADIO APPLICATION
# =============================================================================

print(f"\nπŸš€ Starting SWE Agent PR Leaderboard")
print(f"   Leaderboard time frame: {LEADERBOARD_TIME_FRAME_DAYS} days ({LEADERBOARD_TIME_FRAME_DAYS // 30} months)")
print(f"   Mining update frequency: Every {UPDATE_TIME_FRAME_DAYS} days\n")

# Start APScheduler for monthly PR mining at 12:00 AM UTC every 1st of the month
scheduler = BackgroundScheduler(timezone="UTC")
scheduler.add_job(
    mine_all_agents,
    trigger=CronTrigger(day=1, hour=0, minute=0),  # 12:00 AM UTC every 1st of the month
    id='monthly_review_mining',
    name='Monthly Review Mining',
    replace_existing=True
)
scheduler.start()
print(f"\n{'='*80}")
print(f"βœ“ Scheduler initialized successfully")
print(f"⛏️  Mining schedule: Every 1st of the month at 12:00 AM UTC")
print(f"πŸ“₯ On startup: Only loads cached data from HuggingFace (no mining)")
print(f"{'='*80}\n")

# Create Gradio interface
with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
    total_months = LEADERBOARD_TIME_FRAME_DAYS // 30

    gr.Markdown("# πŸ† SWE Agent Review Leaderboard")
    gr.Markdown(f"Track and compare GitHub PR review acceptance statistics for SWE agents")
    
    with gr.Tabs():

        # Leaderboard Tab
        with gr.Tab("πŸ“Š Leaderboard"):
            gr.Markdown(f"*All statistics are based on reviews from the last {total_months} months*")
            leaderboard_table = Leaderboard(
                value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]),  # Empty initially
                datatype=LEADERBOARD_COLUMNS,
                search_columns=["Agent Name", "Website"],
                filter_columns=[
                    ColumnFilter(
                        "Acceptance Rate (%)",
                        min=0,
                        max=100,
                        default=[0, 100],
                        type="slider",
                        label="Acceptance Rate (%)"
                    )
                ]
            )

            # Load leaderboard data when app starts
            app.load(
                fn=get_leaderboard_dataframe,
                inputs=[],
                outputs=[leaderboard_table]
            )

            # Monthly Metrics Section
            gr.Markdown("---")  # Divider
            gr.Markdown("### πŸ“ˆ Monthly Performance - Top 5 Agents")
            gr.Markdown("*Shows acceptance rate trends and review volumes for the most active agents*")

            monthly_metrics_plot = gr.Plot(label="Monthly Metrics")

            # Load monthly metrics when app starts
            app.load(
                fn=lambda: create_monthly_metrics_plot(),
                inputs=[],
                outputs=[monthly_metrics_plot]
            )


        # Submit Agent Tab
        with gr.Tab("βž• Submit Agent"):
            
            gr.Markdown("### Submit Your Agent")
            gr.Markdown("Fill in the details below to add your agent to the leaderboard. Make sure you're logged in to HuggingFace CLI on your machine.")
            
            with gr.Row():
                with gr.Column():
                    github_input = gr.Textbox(
                        label="GitHub Identifier*",
                        placeholder="Your agent username (e.g., my-agent-bot)"
                    )
                    name_input = gr.Textbox(
                        label="Agent Name*",
                        placeholder="Your agent's display name"
                    )
                
                with gr.Column():
                    developer_input = gr.Textbox(
                        label="Developer*",
                        placeholder="Your developer or team name"
                    )
                    website_input = gr.Textbox(
                        label="Website",
                        placeholder="https://your-agent-website.com"
                    )
            
            submit_button = gr.Button(
                "Submit Agent",
                variant="primary"
            )
            submission_status = gr.Textbox(
                label="Submission Status",
                interactive=False
            )
            
            # Event handler
            submit_button.click(
                fn=submit_agent,
                inputs=[github_input, name_input, developer_input, website_input],
                outputs=[submission_status, leaderboard_table]
            )


# Launch application
if __name__ == "__main__":
    app.launch()