milwright commited on
Commit
de88fe1
Β·
1 Parent(s): ef943a5

Optimize AI word selection to reduce fallback rate

Browse files

- Relax overly strict prompt constraints that caused AI to return empty arrays
- Change "NEVER select from first/last sentence" to "PREFER middle portions"
- Add instruction to prioritize returning something over nothing
- Improve caps detection to exclude numbers and add debug logging
- Add rejection for consecutive all-caps lines (title pages, copyright notices)
- Increase caps penalty weight from 30 to 100 for better filtering

Files changed (2) hide show
  1. src/aiService.js +2 -2
  2. src/clozeGameEngine.js +35 -2
src/aiService.js CHANGED
@@ -170,8 +170,8 @@ REQUIREMENTS:
170
  - Avoid: capitalized words, ALL-CAPS words, function words, archaic terms, proper nouns, technical jargon
171
  - Skip any words that look malformed or concatenated
172
  - Avoid dated or potentially offensive terms
173
- - NEVER select words from the first or last sentence/clause of the passage
174
- - Choose words from the middle portions for better context dependency
175
 
176
  Return ONLY a JSON array of the selected words.
177
 
 
170
  - Avoid: capitalized words, ALL-CAPS words, function words, archaic terms, proper nouns, technical jargon
171
  - Skip any words that look malformed or concatenated
172
  - Avoid dated or potentially offensive terms
173
+ - PREFER words from the middle portions of the passage when possible
174
+ - If struggling to find ${count} perfect words, prioritize returning SOMETHING over returning nothing
175
 
176
  Return ONLY a JSON array of the selected words.
177
 
src/clozeGameEngine.js CHANGED
@@ -156,7 +156,7 @@ class ClozeGame {
156
  const totalWords = words.length;
157
 
158
  // Count various quality indicators
159
- const capsWords = words.filter(w => w.length > 1 && w === w.toUpperCase());
160
  const capsCount = capsWords.length;
161
  const numbersCount = words.filter(w => /\d/.test(w)).length;
162
  const shortWords = words.filter(w => w.length <= 3).length;
@@ -164,6 +164,12 @@ class ClozeGame {
164
  const sentenceList = passage.split(/[.!?]+/).filter(s => s.trim().length > 10);
165
  const lines = passage.split('\n').filter(l => l.trim());
166
 
 
 
 
 
 
 
167
  // Count excessive dashes (n-dashes, m-dashes, hyphens in sequence)
168
  const dashSequences = (passage.match(/[-—–]{3,}/g) || []).length;
169
  const totalDashes = (passage.match(/[-—–]/g) || []).length;
@@ -205,6 +211,19 @@ class ClozeGame {
205
  const titlePattern = /^[A-Z][A-Z\s]+$/m;
206
  const titleLines = lines.filter(line => titlePattern.test(line.trim())).length;
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  // Calculate quality ratios
209
  const capsRatio = capsCount / totalWords;
210
  const numbersRatio = numbersCount / totalWords;
@@ -230,7 +249,21 @@ class ClozeGame {
230
  let qualityScore = 0;
231
  let issues = [];
232
 
233
- if (capsRatio > capsThreshold) { qualityScore += capsRatio * 30; issues.push(`caps: ${Math.round(capsRatio * 100)}%`); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  if (numbersRatio > numbersThreshold) { qualityScore += numbersRatio * 40; issues.push(`numbers: ${Math.round(numbersRatio * 100)}%`); }
235
  if (punctuationRatio > 0.08) { qualityScore += punctuationRatio * 15; issues.push(`punct: ${Math.round(punctuationRatio * 100)}%`); }
236
  if (avgWordsPerSentence < 8 || avgWordsPerSentence > 40) { qualityScore += 2; issues.push(`sent-len: ${Math.round(avgWordsPerSentence)}`); }
 
156
  const totalWords = words.length;
157
 
158
  // Count various quality indicators
159
+ const capsWords = words.filter(w => w.length > 1 && w === w.toUpperCase() && !/^\d+$/.test(w));
160
  const capsCount = capsWords.length;
161
  const numbersCount = words.filter(w => /\d/.test(w)).length;
162
  const shortWords = words.filter(w => w.length <= 3).length;
 
164
  const sentenceList = passage.split(/[.!?]+/).filter(s => s.trim().length > 10);
165
  const lines = passage.split('\n').filter(l => l.trim());
166
 
167
+ // Debug logging for caps detection
168
+ if (capsCount > 5) {
169
+ console.log(`High caps count detected: ${capsCount}/${totalWords} words (${Math.round((capsCount/totalWords) * 100)}%)`);
170
+ console.log(`Sample caps words:`, capsWords.slice(0, 10));
171
+ }
172
+
173
  // Count excessive dashes (n-dashes, m-dashes, hyphens in sequence)
174
  const dashSequences = (passage.match(/[-—–]{3,}/g) || []).length;
175
  const totalDashes = (passage.match(/[-—–]/g) || []).length;
 
211
  const titlePattern = /^[A-Z][A-Z\s]+$/m;
212
  const titleLines = lines.filter(line => titlePattern.test(line.trim())).length;
213
 
214
+ // Check for consecutive all-caps lines (title pages, copyright notices)
215
+ let consecutiveCapsLines = 0;
216
+ let maxConsecutiveCaps = 0;
217
+ lines.forEach(line => {
218
+ const trimmed = line.trim();
219
+ if (trimmed.length > 3 && trimmed === trimmed.toUpperCase() && !/^\d+$/.test(trimmed)) {
220
+ consecutiveCapsLines++;
221
+ maxConsecutiveCaps = Math.max(maxConsecutiveCaps, consecutiveCapsLines);
222
+ } else {
223
+ consecutiveCapsLines = 0;
224
+ }
225
+ });
226
+
227
  // Calculate quality ratios
228
  const capsRatio = capsCount / totalWords;
229
  const numbersRatio = numbersCount / totalWords;
 
249
  let qualityScore = 0;
250
  let issues = [];
251
 
252
+ // Immediate rejection for excessive caps (title pages, headers, etc)
253
+ if (capsRatio > 0.15) {
254
+ console.log(`Rejecting passage with excessive caps: ${Math.round(capsRatio * 100)}%`);
255
+ attempts++;
256
+ continue;
257
+ }
258
+
259
+ // Immediate rejection for consecutive all-caps lines (title pages, copyright)
260
+ if (maxConsecutiveCaps >= 2) {
261
+ console.log(`Rejecting passage with ${maxConsecutiveCaps} consecutive all-caps lines`);
262
+ attempts++;
263
+ continue;
264
+ }
265
+
266
+ if (capsRatio > capsThreshold) { qualityScore += capsRatio * 100; issues.push(`caps: ${Math.round(capsRatio * 100)}%`); }
267
  if (numbersRatio > numbersThreshold) { qualityScore += numbersRatio * 40; issues.push(`numbers: ${Math.round(numbersRatio * 100)}%`); }
268
  if (punctuationRatio > 0.08) { qualityScore += punctuationRatio * 15; issues.push(`punct: ${Math.round(punctuationRatio * 100)}%`); }
269
  if (avgWordsPerSentence < 8 || avgWordsPerSentence > 40) { qualityScore += 2; issues.push(`sent-len: ${Math.round(avgWordsPerSentence)}`); }