Spaces:
Runtime error
Runtime error
fix: examples
Browse files
app.py
CHANGED
|
@@ -53,7 +53,8 @@ EXAMPLES = [
|
|
| 53 |
"user_input": "How does metformin work to treat diabetes?",
|
| 54 |
"retrieved_context": "Metformin reduces hepatic glucose production, decreases intestinal glucose absorption, and improves insulin sensitivity by increasing peripheral glucose uptake.",
|
| 55 |
"pass_criteria": "Does the MODEL OUTPUT explain the mechanism of action accurately and completely?",
|
| 56 |
-
"rubric": "
|
|
|
|
| 57 |
},
|
| 58 |
{
|
| 59 |
"emoji": "π",
|
|
@@ -61,7 +62,8 @@ EXAMPLES = [
|
|
| 61 |
"user_input": "What is a bull market?",
|
| 62 |
"gold_answer": "A bull market is a financial market condition where prices are rising or expected to rise, typically defined by a 20% rise from recent lows.",
|
| 63 |
"pass_criteria": "Does the MODEL OUTPUT provide a complete and accurate definition?",
|
| 64 |
-
"rubric": "1. Incorrect or misleading\n2. Basic but incomplete\n3. Accurate but missing technical details\n4. Complete with technical specifics\n5. Comprehensive with market context"
|
|
|
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"emoji": "π«",
|
|
@@ -69,14 +71,17 @@ EXAMPLES = [
|
|
| 69 |
"user_input": "What are the diagnostic criteria for hypertension?",
|
| 70 |
"retrieved_context": "Stage 1 hypertension: systolic 130-139 or diastolic 80-89 mmHg. Stage 2: systolic β₯140 or diastolic β₯90 mmHg.",
|
| 71 |
"pass_criteria": "Does the MODEL OUTPUT accurately reflect current diagnostic guidelines?",
|
| 72 |
-
"rubric": "1. Incorrect values\n2. Partially correct but imprecise\n3. Correct but missing staging\n4. Complete with staging information\n5. Comprehensive with risk factors"
|
|
|
|
| 73 |
},
|
| 74 |
{
|
| 75 |
"emoji": "π°",
|
| 76 |
"model_output": "ETFs are investment funds traded on stock exchanges, offering diversification and lower fees than mutual funds.",
|
| 77 |
"user_input": "What are ETFs and their advantages?",
|
| 78 |
"pass_criteria": "Does the MODEL OUTPUT explain both the concept and benefits accurately?",
|
| 79 |
-
"rubric": "
|
|
|
|
|
|
|
| 80 |
},
|
| 81 |
{
|
| 82 |
"emoji": "π₯",
|
|
@@ -84,7 +89,8 @@ EXAMPLES = [
|
|
| 84 |
"user_input": "What is MRSA?",
|
| 85 |
"retrieved_context": "MRSA (Methicillin-resistant Staphylococcus aureus) is a bacteria resistant to many antibiotics. It can cause skin infections, pneumonia, and bloodstream infections.",
|
| 86 |
"pass_criteria": "Does the MODEL OUTPUT explain both resistance and clinical significance?",
|
| 87 |
-
"rubric": "1. Incorrect information\n2. Only mentions resistance\n3. Correct but incomplete clinical picture\n4. Complete with resistance and clinical aspects\n5. Comprehensive with treatment options"
|
|
|
|
| 88 |
},
|
| 89 |
{
|
| 90 |
"emoji": "π",
|
|
@@ -92,7 +98,8 @@ EXAMPLES = [
|
|
| 92 |
"user_input": "What is diversification in investing?",
|
| 93 |
"gold_answer": "Diversification is a risk management strategy that mixes various investments within a portfolio to reduce exposure to any single asset or risk.",
|
| 94 |
"pass_criteria": "Does the MODEL OUTPUT explain both the concept and purpose of diversification?",
|
| 95 |
-
"rubric": "
|
|
|
|
| 96 |
},
|
| 97 |
{
|
| 98 |
"emoji": "π§¬",
|
|
@@ -100,14 +107,17 @@ EXAMPLES = [
|
|
| 100 |
"user_input": "What causes Type 2 diabetes?",
|
| 101 |
"retrieved_context": "Type 2 diabetes develops when the body becomes resistant to insulin or the pancreas doesn't produce enough insulin. Risk factors include obesity, physical inactivity, and genetics.",
|
| 102 |
"pass_criteria": "Does the MODEL OUTPUT explain both pathophysiology and risk factors?",
|
| 103 |
-
"rubric": "1. Incorrect pathophysiology\n2. Basic mechanism only\n3. Correct mechanism with partial risk factors\n4. Complete with risk factors\n5. Comprehensive with prevention strategies"
|
|
|
|
| 104 |
},
|
| 105 |
{
|
| 106 |
"emoji": "π΅",
|
| 107 |
"model_output": "A mortgage amortization schedule shows monthly payments divided between principal and interest over the loan term.",
|
| 108 |
"user_input": "What is mortgage amortization?",
|
| 109 |
"pass_criteria": "Does the MODEL OUTPUT explain the concept and components clearly?",
|
| 110 |
-
"rubric": "1. Incorrect explanation\n2. Basic definition only\n3. Explains components without context\n4. Complete with payment breakdown\n5. Comprehensive with practical implications"
|
|
|
|
|
|
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"emoji": "π¬",
|
|
@@ -115,7 +125,8 @@ EXAMPLES = [
|
|
| 115 |
"user_input": "How do statins lower cholesterol?",
|
| 116 |
"retrieved_context": "Statins block HMG-CoA reductase enzyme, reducing liver cholesterol production and increasing LDL receptor expression, leading to lower blood cholesterol.",
|
| 117 |
"pass_criteria": "Does the MODEL OUTPUT explain the mechanism accurately?",
|
| 118 |
-
"rubric": "
|
|
|
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"emoji": "π",
|
|
@@ -123,7 +134,8 @@ EXAMPLES = [
|
|
| 123 |
"user_input": "What defines a bear market?",
|
| 124 |
"gold_answer": "A bear market is defined by a prolonged drop in investment prices, typically a 20% or more decline from recent highs, accompanied by widespread pessimism.",
|
| 125 |
"pass_criteria": "Does the MODEL OUTPUT provide technical criteria and market sentiment?",
|
| 126 |
-
"rubric": "1. Incorrect definition\n2. Technical criteria only\n3. Correct with partial context\n4. Complete with market sentiment\n5. Comprehensive with historical context"
|
|
|
|
| 127 |
}
|
| 128 |
]
|
| 129 |
|
|
|
|
| 53 |
"user_input": "How does metformin work to treat diabetes?",
|
| 54 |
"retrieved_context": "Metformin reduces hepatic glucose production, decreases intestinal glucose absorption, and improves insulin sensitivity by increasing peripheral glucose uptake.",
|
| 55 |
"pass_criteria": "Does the MODEL OUTPUT explain the mechanism of action accurately and completely?",
|
| 56 |
+
"rubric": "0. Incorrect or incomplete\n1. Fully correct and comprehensive",
|
| 57 |
+
"gold_answer": ""
|
| 58 |
},
|
| 59 |
{
|
| 60 |
"emoji": "π",
|
|
|
|
| 62 |
"user_input": "What is a bull market?",
|
| 63 |
"gold_answer": "A bull market is a financial market condition where prices are rising or expected to rise, typically defined by a 20% rise from recent lows.",
|
| 64 |
"pass_criteria": "Does the MODEL OUTPUT provide a complete and accurate definition?",
|
| 65 |
+
"rubric": "1. Incorrect or misleading\n2. Basic but incomplete\n3. Accurate but missing technical details\n4. Complete with technical specifics\n5. Comprehensive with market context",
|
| 66 |
+
"retrieved_context": ""
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"emoji": "π«",
|
|
|
|
| 71 |
"user_input": "What are the diagnostic criteria for hypertension?",
|
| 72 |
"retrieved_context": "Stage 1 hypertension: systolic 130-139 or diastolic 80-89 mmHg. Stage 2: systolic β₯140 or diastolic β₯90 mmHg.",
|
| 73 |
"pass_criteria": "Does the MODEL OUTPUT accurately reflect current diagnostic guidelines?",
|
| 74 |
+
"rubric": "1. Incorrect values\n2. Partially correct but imprecise\n3. Correct but missing staging\n4. Complete with staging information\n5. Comprehensive with risk factors",
|
| 75 |
+
"gold_answer": ""
|
| 76 |
},
|
| 77 |
{
|
| 78 |
"emoji": "π°",
|
| 79 |
"model_output": "ETFs are investment funds traded on stock exchanges, offering diversification and lower fees than mutual funds.",
|
| 80 |
"user_input": "What are ETFs and their advantages?",
|
| 81 |
"pass_criteria": "Does the MODEL OUTPUT explain both the concept and benefits accurately?",
|
| 82 |
+
"rubric": "0. Incorrect or incomplete explanation\n1. Correct with complete benefits",
|
| 83 |
+
"retrieved_context": "",
|
| 84 |
+
"gold_answer": ""
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"emoji": "π₯",
|
|
|
|
| 89 |
"user_input": "What is MRSA?",
|
| 90 |
"retrieved_context": "MRSA (Methicillin-resistant Staphylococcus aureus) is a bacteria resistant to many antibiotics. It can cause skin infections, pneumonia, and bloodstream infections.",
|
| 91 |
"pass_criteria": "Does the MODEL OUTPUT explain both resistance and clinical significance?",
|
| 92 |
+
"rubric": "1. Incorrect information\n2. Only mentions resistance\n3. Correct but incomplete clinical picture\n4. Complete with resistance and clinical aspects\n5. Comprehensive with treatment options",
|
| 93 |
+
"gold_answer": ""
|
| 94 |
},
|
| 95 |
{
|
| 96 |
"emoji": "π",
|
|
|
|
| 98 |
"user_input": "What is diversification in investing?",
|
| 99 |
"gold_answer": "Diversification is a risk management strategy that mixes various investments within a portfolio to reduce exposure to any single asset or risk.",
|
| 100 |
"pass_criteria": "Does the MODEL OUTPUT explain both the concept and purpose of diversification?",
|
| 101 |
+
"rubric": "0. Incorrect or incomplete\n1. Correct and comprehensive",
|
| 102 |
+
"retrieved_context": ""
|
| 103 |
},
|
| 104 |
{
|
| 105 |
"emoji": "π§¬",
|
|
|
|
| 107 |
"user_input": "What causes Type 2 diabetes?",
|
| 108 |
"retrieved_context": "Type 2 diabetes develops when the body becomes resistant to insulin or the pancreas doesn't produce enough insulin. Risk factors include obesity, physical inactivity, and genetics.",
|
| 109 |
"pass_criteria": "Does the MODEL OUTPUT explain both pathophysiology and risk factors?",
|
| 110 |
+
"rubric": "1. Incorrect pathophysiology\n2. Basic mechanism only\n3. Correct mechanism with partial risk factors\n4. Complete with risk factors\n5. Comprehensive with prevention strategies",
|
| 111 |
+
"gold_answer": ""
|
| 112 |
},
|
| 113 |
{
|
| 114 |
"emoji": "π΅",
|
| 115 |
"model_output": "A mortgage amortization schedule shows monthly payments divided between principal and interest over the loan term.",
|
| 116 |
"user_input": "What is mortgage amortization?",
|
| 117 |
"pass_criteria": "Does the MODEL OUTPUT explain the concept and components clearly?",
|
| 118 |
+
"rubric": "1. Incorrect explanation\n2. Basic definition only\n3. Explains components without context\n4. Complete with payment breakdown\n5. Comprehensive with practical implications",
|
| 119 |
+
"retrieved_context": "",
|
| 120 |
+
"gold_answer": ""
|
| 121 |
},
|
| 122 |
{
|
| 123 |
"emoji": "π¬",
|
|
|
|
| 125 |
"user_input": "How do statins lower cholesterol?",
|
| 126 |
"retrieved_context": "Statins block HMG-CoA reductase enzyme, reducing liver cholesterol production and increasing LDL receptor expression, leading to lower blood cholesterol.",
|
| 127 |
"pass_criteria": "Does the MODEL OUTPUT explain the mechanism accurately?",
|
| 128 |
+
"rubric": "0. Incorrect or incomplete mechanism\n1. Correct and complete explanation",
|
| 129 |
+
"gold_answer": ""
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"emoji": "π",
|
|
|
|
| 134 |
"user_input": "What defines a bear market?",
|
| 135 |
"gold_answer": "A bear market is defined by a prolonged drop in investment prices, typically a 20% or more decline from recent highs, accompanied by widespread pessimism.",
|
| 136 |
"pass_criteria": "Does the MODEL OUTPUT provide technical criteria and market sentiment?",
|
| 137 |
+
"rubric": "1. Incorrect definition\n2. Technical criteria only\n3. Correct with partial context\n4. Complete with market sentiment\n5. Comprehensive with historical context",
|
| 138 |
+
"retrieved_context": ""
|
| 139 |
}
|
| 140 |
]
|
| 141 |
|