Update eval_utils.py
Browse files- eval_utils.py +4 -3
eval_utils.py
CHANGED
|
@@ -253,11 +253,13 @@ def evaluate_summ(gold_data, pred_data):
|
|
| 253 |
gold_summaries.append(gold_summary)
|
| 254 |
pred_summaries.append(pred_summary)
|
| 255 |
|
|
|
|
| 256 |
rl_evaluator = rouge.Rouge(metrics=['rouge-n','rouge-l'], max_n=2, limit_length=False, apply_avg=True)
|
| 257 |
rl_scores = rl_evaluator.get_scores(pred_summaries, gold_summaries)
|
| 258 |
-
|
|
|
|
| 259 |
_, _, bs = bert_score.score(pred_summaries, gold_summaries, lang="en", verbose=True, device='cuda')
|
| 260 |
-
print("
|
| 261 |
return {'ROUGE': rl_scores['rouge-l']['f'], 'BERTSCORE': bs.mean().item()}
|
| 262 |
|
| 263 |
|
|
@@ -420,7 +422,6 @@ def get_evaluation_scores(gold_data, submission_data):
|
|
| 420 |
evaluation_results = {}
|
| 421 |
|
| 422 |
for task in submission_data.keys():
|
| 423 |
-
if task != 'summ': continue
|
| 424 |
print(f"Task: {task}")
|
| 425 |
|
| 426 |
if task == "bail":
|
|
|
|
| 253 |
gold_summaries.append(gold_summary)
|
| 254 |
pred_summaries.append(pred_summary)
|
| 255 |
|
| 256 |
+
|
| 257 |
rl_evaluator = rouge.Rouge(metrics=['rouge-n','rouge-l'], max_n=2, limit_length=False, apply_avg=True)
|
| 258 |
rl_scores = rl_evaluator.get_scores(pred_summaries, gold_summaries)
|
| 259 |
+
print("Rouge:", {k:v['f'] for k,v in rl_scores.items()}, flush=True)
|
| 260 |
+
|
| 261 |
_, _, bs = bert_score.score(pred_summaries, gold_summaries, lang="en", verbose=True, device='cuda')
|
| 262 |
+
print("BERTSCORE:", bs.mean().item())
|
| 263 |
return {'ROUGE': rl_scores['rouge-l']['f'], 'BERTSCORE': bs.mean().item()}
|
| 264 |
|
| 265 |
|
|
|
|
| 422 |
evaluation_results = {}
|
| 423 |
|
| 424 |
for task in submission_data.keys():
|
|
|
|
| 425 |
print(f"Task: {task}")
|
| 426 |
|
| 427 |
if task == "bail":
|