File size: 3,304 Bytes
509e21e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env node
const fs = require('fs')
const path = require('path')

const EVAL_DIR = path.join(__dirname, '..', 'public', 'evaluations')

const benchmarkFields = [
  'id',
  'url',
  'description',
  'sourceType',
  'benchmarkName',
  'metrics',
  'score',
  'version',
  'taskVariants',
  'customFields',
]

const processFields = [
  'id',
  'url',
  'description',
  'sourceType',
  'documentType',
  'customFields',
]

function ensureId(prefix = 's') {
  return `${prefix}-${Date.now().toString(36)}-${Math.random().toString(36).slice(2,8)}`
}

function normalizeSource(obj, isProcess = false) {
  if (typeof obj === 'string') {
    obj = { description: obj }
  }

  if (obj == null || typeof obj !== 'object') obj = {}

  const out = {}
  const fields = isProcess ? processFields : benchmarkFields
  for (const f of fields) {
    if (f === 'id') {
      out.id = obj.id || ensureId(isProcess ? 'proc' : 'bench')
      continue
    }
    if (f === 'customFields') {
      out.customFields = obj.customFields && typeof obj.customFields === 'object' ? obj.customFields : {}
      continue
    }

    out[f] = obj[f] != null ? obj[f] : ''
  }

  return out
}

function normalizeFile(filePath) {
  const content = fs.readFileSync(filePath, 'utf8')
  let data
  try {
    data = JSON.parse(content)
  } catch (e) {
    console.error(`Skipping ${filePath}: invalid JSON`)
    return false
  }

  let changed = false

  if (data.categoryEvaluations && typeof data.categoryEvaluations === 'object') {
    for (const [catId, catData] of Object.entries(data.categoryEvaluations)) {
      if (!catData || typeof catData !== 'object') continue

      // benchmarkSources: object mapping questionId -> array
      catData.benchmarkSources = catData.benchmarkSources || {}
      for (const [qid, arr] of Object.entries(catData.benchmarkSources)) {
        const list = Array.isArray(arr) ? arr : arr ? [arr] : []
        const normalized = list.map((s) => normalizeSource(s, false))
        // compare lengths/ids to detect change
        if (JSON.stringify(list) !== JSON.stringify(normalized)) changed = true
        catData.benchmarkSources[qid] = normalized
      }

      catData.processSources = catData.processSources || {}
      for (const [qid, arr] of Object.entries(catData.processSources)) {
        const list = Array.isArray(arr) ? arr : arr ? [arr] : []
        const normalized = list.map((s) => normalizeSource(s, true))
        if (JSON.stringify(list) !== JSON.stringify(normalized)) changed = true
        catData.processSources[qid] = normalized
      }

      // ensure answers exist as objects but don't alter existing answers
      catData.benchmarkAnswers = catData.benchmarkAnswers || {}
      catData.processAnswers = catData.processAnswers || {}
    }
  }

  if (changed) {
    fs.writeFileSync(filePath, JSON.stringify(data, null, 2) + '\n')
    console.log(`Updated: ${path.basename(filePath)}`)
    return true
  }

  console.log(`Unchanged: ${path.basename(filePath)}`)
  return false
}

function main() {
  const files = fs.readdirSync(EVAL_DIR).filter((f) => f.endsWith('.json'))
  for (const f of files) {
    const fp = path.join(EVAL_DIR, f)
    try {
      normalizeFile(fp)
    } catch (e) {
      console.error(`Error processing ${f}:`, e.message)
    }
  }
}

main()