Spaces:
Runtime error
Runtime error
fix elo scoring
Browse files- calculate_elo.py +69 -35
calculate_elo.py
CHANGED
|
@@ -4,7 +4,8 @@ from datetime import datetime
|
|
| 4 |
from decimal import Decimal
|
| 5 |
|
| 6 |
import boto3
|
| 7 |
-
from boto3.dynamodb.conditions import Attr
|
|
|
|
| 8 |
|
| 9 |
logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
|
| 10 |
|
|
@@ -89,6 +90,10 @@ def _create_elo_logs_table():
|
|
| 89 |
'AttributeName': 'arena_battle_id',
|
| 90 |
'KeyType': 'HASH' # Partition key
|
| 91 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
],
|
| 93 |
AttributeDefinitions=[
|
| 94 |
{
|
|
@@ -99,6 +104,10 @@ def _create_elo_logs_table():
|
|
| 99 |
'AttributeName': 'battle_timestamp',
|
| 100 |
'AttributeType': 'S'
|
| 101 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
],
|
| 103 |
ProvisionedThroughput={
|
| 104 |
'ReadCapacityUnits': 10,
|
|
@@ -106,12 +115,16 @@ def _create_elo_logs_table():
|
|
| 106 |
},
|
| 107 |
GlobalSecondaryIndexes=[
|
| 108 |
{
|
| 109 |
-
'IndexName': '
|
| 110 |
'KeySchema': [
|
| 111 |
{
|
| 112 |
-
'AttributeName': '
|
| 113 |
'KeyType': 'HASH' # Partition key for the GSI
|
| 114 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
],
|
| 116 |
'Projection': {
|
| 117 |
'ProjectionType': 'ALL'
|
|
@@ -157,36 +170,15 @@ def calculate_elo(rating1, rating2, result, K=32):
|
|
| 157 |
def get_last_processed_timestamp():
|
| 158 |
table = dynamodb.Table('elo_logs')
|
| 159 |
|
| 160 |
-
response = table.update (
|
| 161 |
-
AttributeDefinitions=[
|
| 162 |
-
{
|
| 163 |
-
'AttributeName': 'timestamp',
|
| 164 |
-
'AttributeType': 'S'
|
| 165 |
-
},
|
| 166 |
-
],
|
| 167 |
-
GlobalSecondaryIndexUpdates=[
|
| 168 |
-
{
|
| 169 |
-
'Create': {
|
| 170 |
-
'IndexName': 'TimestampIndex',
|
| 171 |
-
'KeySchema': [
|
| 172 |
-
{
|
| 173 |
-
'AttributeName': 'timestamp',
|
| 174 |
-
'KeyType': 'RANGE'
|
| 175 |
-
},
|
| 176 |
-
],
|
| 177 |
-
'Projection': {
|
| 178 |
-
'ProjectionType': 'ALL',
|
| 179 |
-
}
|
| 180 |
-
},
|
| 181 |
-
},
|
| 182 |
-
]
|
| 183 |
-
)
|
| 184 |
-
|
| 185 |
# Scan the table sorted by timestamp in descending order
|
| 186 |
-
response = table.
|
| 187 |
-
|
| 188 |
-
|
|
|
|
|
|
|
| 189 |
)
|
|
|
|
|
|
|
| 190 |
|
| 191 |
# If there are no items in the table, return a default timestamp
|
| 192 |
if not response['Items']:
|
|
@@ -207,7 +199,8 @@ def log_elo_update(arena_battle_id, battle_timestamp, new_rating1, new_rating2):
|
|
| 207 |
'battle_timestamp': battle_timestamp, # Use the timestamp of the battle
|
| 208 |
'log_timestamp': datetime.now().isoformat(), # Also store the timestamp of the log for completeness
|
| 209 |
'new_rating1': new_rating1,
|
| 210 |
-
'new_rating2': new_rating2
|
|
|
|
| 211 |
}
|
| 212 |
)
|
| 213 |
|
|
@@ -238,9 +231,41 @@ def update_elo_score(chatbot_name, new_elo_score):
|
|
| 238 |
)
|
| 239 |
|
| 240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
def main():
|
| 242 |
-
#
|
| 243 |
-
|
|
|
|
|
|
|
| 244 |
battles = get_unprocessed_battles(last_processed_timestamp)
|
| 245 |
|
| 246 |
elo_scores = {}
|
|
@@ -262,15 +287,24 @@ def main():
|
|
| 262 |
elo_result = 0
|
| 263 |
|
| 264 |
new_rating1, new_rating2 = calculate_elo(elo_scores[battle['choice1_name']], elo_scores[battle['choice2_name']], elo_result)
|
|
|
|
| 265 |
elo_scores[battle['choice1_name']] = new_rating1
|
| 266 |
elo_scores[battle['choice2_name']] = new_rating2
|
| 267 |
log_elo_update(battle['arena_battle_id'], battle['timestamp'], new_rating1, new_rating2)
|
| 268 |
-
logging.info(f"{battle['choice1_name']}: {elo_scores[battle['choice1_name']]} -> {new_rating1} | {battle['choice2_name']}: {elo_scores[battle['choice2_name']]} -> {new_rating2}")
|
| 269 |
update_elo_score(battle['choice1_name'], new_rating1)
|
| 270 |
update_elo_score(battle['choice2_name'], new_rating2)
|
| 271 |
elo_scores[battle['choice1_name']] = new_rating1
|
| 272 |
elo_scores[battle['choice2_name']] = new_rating2
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
if __name__ == "__main__":
|
| 276 |
main()
|
|
|
|
| 4 |
from decimal import Decimal
|
| 5 |
|
| 6 |
import boto3
|
| 7 |
+
from boto3.dynamodb.conditions import Attr, Key
|
| 8 |
+
from datasets import Dataset
|
| 9 |
|
| 10 |
logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
|
| 11 |
|
|
|
|
| 90 |
'AttributeName': 'arena_battle_id',
|
| 91 |
'KeyType': 'HASH' # Partition key
|
| 92 |
},
|
| 93 |
+
{
|
| 94 |
+
'AttributeName': 'battle_timestamp',
|
| 95 |
+
'KeyType': 'RANGE' # Sort key
|
| 96 |
+
},
|
| 97 |
],
|
| 98 |
AttributeDefinitions=[
|
| 99 |
{
|
|
|
|
| 104 |
'AttributeName': 'battle_timestamp',
|
| 105 |
'AttributeType': 'S'
|
| 106 |
},
|
| 107 |
+
{
|
| 108 |
+
'AttributeName': 'all',
|
| 109 |
+
'AttributeType': 'S'
|
| 110 |
+
}
|
| 111 |
],
|
| 112 |
ProvisionedThroughput={
|
| 113 |
'ReadCapacityUnits': 10,
|
|
|
|
| 115 |
},
|
| 116 |
GlobalSecondaryIndexes=[
|
| 117 |
{
|
| 118 |
+
'IndexName': 'AllTimestampIndex',
|
| 119 |
'KeySchema': [
|
| 120 |
{
|
| 121 |
+
'AttributeName': 'all',
|
| 122 |
'KeyType': 'HASH' # Partition key for the GSI
|
| 123 |
},
|
| 124 |
+
{
|
| 125 |
+
'AttributeName': 'battle_timestamp',
|
| 126 |
+
'KeyType': 'RANGE' # Sort key for the GSI
|
| 127 |
+
}
|
| 128 |
],
|
| 129 |
'Projection': {
|
| 130 |
'ProjectionType': 'ALL'
|
|
|
|
| 170 |
def get_last_processed_timestamp():
|
| 171 |
table = dynamodb.Table('elo_logs')
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
# Scan the table sorted by timestamp in descending order
|
| 174 |
+
response = table.query(
|
| 175 |
+
IndexName='AllTimestampIndex',
|
| 176 |
+
KeyConditionExpression=Key('all').eq('ALL'),
|
| 177 |
+
ScanIndexForward=False,
|
| 178 |
+
Limit=1
|
| 179 |
)
|
| 180 |
+
print(response)
|
| 181 |
+
# exit(0)
|
| 182 |
|
| 183 |
# If there are no items in the table, return a default timestamp
|
| 184 |
if not response['Items']:
|
|
|
|
| 199 |
'battle_timestamp': battle_timestamp, # Use the timestamp of the battle
|
| 200 |
'log_timestamp': datetime.now().isoformat(), # Also store the timestamp of the log for completeness
|
| 201 |
'new_rating1': new_rating1,
|
| 202 |
+
'new_rating2': new_rating2,
|
| 203 |
+
'all': 'ALL',
|
| 204 |
}
|
| 205 |
)
|
| 206 |
|
|
|
|
| 231 |
)
|
| 232 |
|
| 233 |
|
| 234 |
+
def get_elo_scores():
|
| 235 |
+
table = dynamodb.Table('elo_scores')
|
| 236 |
+
|
| 237 |
+
response = table.scan()
|
| 238 |
+
data = response['Items']
|
| 239 |
+
|
| 240 |
+
return data
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def _backfill_logs():
|
| 244 |
+
table = dynamodb.Table('elo_logs')
|
| 245 |
+
|
| 246 |
+
# Initialize the scan operation
|
| 247 |
+
response = table.scan()
|
| 248 |
+
|
| 249 |
+
for item in response['Items']:
|
| 250 |
+
table.update_item(
|
| 251 |
+
Key={
|
| 252 |
+
'arena_battle_id': item['arena_battle_id'],
|
| 253 |
+
'battle_timestamp': item['battle_timestamp']
|
| 254 |
+
},
|
| 255 |
+
UpdateExpression="SET #all = :value",
|
| 256 |
+
ExpressionAttributeNames={
|
| 257 |
+
'#all': 'all'
|
| 258 |
+
},
|
| 259 |
+
ExpressionAttributeValues={
|
| 260 |
+
':value': 'ALL'
|
| 261 |
+
}
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
def main():
|
| 265 |
+
# _backfill_logs()
|
| 266 |
+
# _create_elo_logs_table()
|
| 267 |
+
last_processed_timestamp = get_last_processed_timestamp()
|
| 268 |
+
# last_processed_timestamp = '1970-01-01T00:00:00'
|
| 269 |
battles = get_unprocessed_battles(last_processed_timestamp)
|
| 270 |
|
| 271 |
elo_scores = {}
|
|
|
|
| 287 |
elo_result = 0
|
| 288 |
|
| 289 |
new_rating1, new_rating2 = calculate_elo(elo_scores[battle['choice1_name']], elo_scores[battle['choice2_name']], elo_result)
|
| 290 |
+
logging.info(f"{battle['choice1_name']}: {elo_scores[battle['choice1_name']]} -> {new_rating1} | {battle['choice2_name']}: {elo_scores[battle['choice2_name']]} -> {new_rating2}")
|
| 291 |
elo_scores[battle['choice1_name']] = new_rating1
|
| 292 |
elo_scores[battle['choice2_name']] = new_rating2
|
| 293 |
log_elo_update(battle['arena_battle_id'], battle['timestamp'], new_rating1, new_rating2)
|
|
|
|
| 294 |
update_elo_score(battle['choice1_name'], new_rating1)
|
| 295 |
update_elo_score(battle['choice2_name'], new_rating2)
|
| 296 |
elo_scores[battle['choice1_name']] = new_rating1
|
| 297 |
elo_scores[battle['choice2_name']] = new_rating2
|
| 298 |
|
| 299 |
+
elo_scores = get_elo_scores()
|
| 300 |
+
for i, j in enumerate(elo_scores):
|
| 301 |
+
j["elo_score"] = float(j["elo_score"])
|
| 302 |
+
elo_scores[i] = j
|
| 303 |
+
|
| 304 |
+
# Convert the data into a format suitable for Hugging Face Dataset
|
| 305 |
+
elo_dataset = Dataset.from_list(elo_scores)
|
| 306 |
+
elo_dataset.push_to_hub("openaccess-ai-collective/chatbot-arena-elo-scores", private=False)
|
| 307 |
+
|
| 308 |
|
| 309 |
if __name__ == "__main__":
|
| 310 |
main()
|