Spaces:
Running
Running
milwright
commited on
Commit
Β·
de88fe1
1
Parent(s):
ef943a5
Optimize AI word selection to reduce fallback rate
Browse files- Relax overly strict prompt constraints that caused AI to return empty arrays
- Change "NEVER select from first/last sentence" to "PREFER middle portions"
- Add instruction to prioritize returning something over nothing
- Improve caps detection to exclude numbers and add debug logging
- Add rejection for consecutive all-caps lines (title pages, copyright notices)
- Increase caps penalty weight from 30 to 100 for better filtering
- src/aiService.js +2 -2
- src/clozeGameEngine.js +35 -2
src/aiService.js
CHANGED
|
@@ -170,8 +170,8 @@ REQUIREMENTS:
|
|
| 170 |
- Avoid: capitalized words, ALL-CAPS words, function words, archaic terms, proper nouns, technical jargon
|
| 171 |
- Skip any words that look malformed or concatenated
|
| 172 |
- Avoid dated or potentially offensive terms
|
| 173 |
-
-
|
| 174 |
-
-
|
| 175 |
|
| 176 |
Return ONLY a JSON array of the selected words.
|
| 177 |
|
|
|
|
| 170 |
- Avoid: capitalized words, ALL-CAPS words, function words, archaic terms, proper nouns, technical jargon
|
| 171 |
- Skip any words that look malformed or concatenated
|
| 172 |
- Avoid dated or potentially offensive terms
|
| 173 |
+
- PREFER words from the middle portions of the passage when possible
|
| 174 |
+
- If struggling to find ${count} perfect words, prioritize returning SOMETHING over returning nothing
|
| 175 |
|
| 176 |
Return ONLY a JSON array of the selected words.
|
| 177 |
|
src/clozeGameEngine.js
CHANGED
|
@@ -156,7 +156,7 @@ class ClozeGame {
|
|
| 156 |
const totalWords = words.length;
|
| 157 |
|
| 158 |
// Count various quality indicators
|
| 159 |
-
const capsWords = words.filter(w => w.length > 1 && w === w.toUpperCase());
|
| 160 |
const capsCount = capsWords.length;
|
| 161 |
const numbersCount = words.filter(w => /\d/.test(w)).length;
|
| 162 |
const shortWords = words.filter(w => w.length <= 3).length;
|
|
@@ -164,6 +164,12 @@ class ClozeGame {
|
|
| 164 |
const sentenceList = passage.split(/[.!?]+/).filter(s => s.trim().length > 10);
|
| 165 |
const lines = passage.split('\n').filter(l => l.trim());
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
// Count excessive dashes (n-dashes, m-dashes, hyphens in sequence)
|
| 168 |
const dashSequences = (passage.match(/[-ββ]{3,}/g) || []).length;
|
| 169 |
const totalDashes = (passage.match(/[-ββ]/g) || []).length;
|
|
@@ -205,6 +211,19 @@ class ClozeGame {
|
|
| 205 |
const titlePattern = /^[A-Z][A-Z\s]+$/m;
|
| 206 |
const titleLines = lines.filter(line => titlePattern.test(line.trim())).length;
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
// Calculate quality ratios
|
| 209 |
const capsRatio = capsCount / totalWords;
|
| 210 |
const numbersRatio = numbersCount / totalWords;
|
|
@@ -230,7 +249,21 @@ class ClozeGame {
|
|
| 230 |
let qualityScore = 0;
|
| 231 |
let issues = [];
|
| 232 |
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
if (numbersRatio > numbersThreshold) { qualityScore += numbersRatio * 40; issues.push(`numbers: ${Math.round(numbersRatio * 100)}%`); }
|
| 235 |
if (punctuationRatio > 0.08) { qualityScore += punctuationRatio * 15; issues.push(`punct: ${Math.round(punctuationRatio * 100)}%`); }
|
| 236 |
if (avgWordsPerSentence < 8 || avgWordsPerSentence > 40) { qualityScore += 2; issues.push(`sent-len: ${Math.round(avgWordsPerSentence)}`); }
|
|
|
|
| 156 |
const totalWords = words.length;
|
| 157 |
|
| 158 |
// Count various quality indicators
|
| 159 |
+
const capsWords = words.filter(w => w.length > 1 && w === w.toUpperCase() && !/^\d+$/.test(w));
|
| 160 |
const capsCount = capsWords.length;
|
| 161 |
const numbersCount = words.filter(w => /\d/.test(w)).length;
|
| 162 |
const shortWords = words.filter(w => w.length <= 3).length;
|
|
|
|
| 164 |
const sentenceList = passage.split(/[.!?]+/).filter(s => s.trim().length > 10);
|
| 165 |
const lines = passage.split('\n').filter(l => l.trim());
|
| 166 |
|
| 167 |
+
// Debug logging for caps detection
|
| 168 |
+
if (capsCount > 5) {
|
| 169 |
+
console.log(`High caps count detected: ${capsCount}/${totalWords} words (${Math.round((capsCount/totalWords) * 100)}%)`);
|
| 170 |
+
console.log(`Sample caps words:`, capsWords.slice(0, 10));
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
// Count excessive dashes (n-dashes, m-dashes, hyphens in sequence)
|
| 174 |
const dashSequences = (passage.match(/[-ββ]{3,}/g) || []).length;
|
| 175 |
const totalDashes = (passage.match(/[-ββ]/g) || []).length;
|
|
|
|
| 211 |
const titlePattern = /^[A-Z][A-Z\s]+$/m;
|
| 212 |
const titleLines = lines.filter(line => titlePattern.test(line.trim())).length;
|
| 213 |
|
| 214 |
+
// Check for consecutive all-caps lines (title pages, copyright notices)
|
| 215 |
+
let consecutiveCapsLines = 0;
|
| 216 |
+
let maxConsecutiveCaps = 0;
|
| 217 |
+
lines.forEach(line => {
|
| 218 |
+
const trimmed = line.trim();
|
| 219 |
+
if (trimmed.length > 3 && trimmed === trimmed.toUpperCase() && !/^\d+$/.test(trimmed)) {
|
| 220 |
+
consecutiveCapsLines++;
|
| 221 |
+
maxConsecutiveCaps = Math.max(maxConsecutiveCaps, consecutiveCapsLines);
|
| 222 |
+
} else {
|
| 223 |
+
consecutiveCapsLines = 0;
|
| 224 |
+
}
|
| 225 |
+
});
|
| 226 |
+
|
| 227 |
// Calculate quality ratios
|
| 228 |
const capsRatio = capsCount / totalWords;
|
| 229 |
const numbersRatio = numbersCount / totalWords;
|
|
|
|
| 249 |
let qualityScore = 0;
|
| 250 |
let issues = [];
|
| 251 |
|
| 252 |
+
// Immediate rejection for excessive caps (title pages, headers, etc)
|
| 253 |
+
if (capsRatio > 0.15) {
|
| 254 |
+
console.log(`Rejecting passage with excessive caps: ${Math.round(capsRatio * 100)}%`);
|
| 255 |
+
attempts++;
|
| 256 |
+
continue;
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
// Immediate rejection for consecutive all-caps lines (title pages, copyright)
|
| 260 |
+
if (maxConsecutiveCaps >= 2) {
|
| 261 |
+
console.log(`Rejecting passage with ${maxConsecutiveCaps} consecutive all-caps lines`);
|
| 262 |
+
attempts++;
|
| 263 |
+
continue;
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
if (capsRatio > capsThreshold) { qualityScore += capsRatio * 100; issues.push(`caps: ${Math.round(capsRatio * 100)}%`); }
|
| 267 |
if (numbersRatio > numbersThreshold) { qualityScore += numbersRatio * 40; issues.push(`numbers: ${Math.round(numbersRatio * 100)}%`); }
|
| 268 |
if (punctuationRatio > 0.08) { qualityScore += punctuationRatio * 15; issues.push(`punct: ${Math.round(punctuationRatio * 100)}%`); }
|
| 269 |
if (avgWordsPerSentence < 8 || avgWordsPerSentence > 40) { qualityScore += 2; issues.push(`sent-len: ${Math.round(avgWordsPerSentence)}`); }
|