Spaces:

openadmet
/

OpenADMET-ExpansionRx-Challenge

Running

Maria Castellanos commited on 5 days ago

Commit

718b39d

1 Parent(s): ec86576

Fix eval for dummy

Files changed (2) hide show

evaluate.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pandas as pd
 from pathlib import Path
 from typing import Optional
 from about import (
-    ENDPOINTS, API,
     submissions_repo,
     results_repo_test,
     results_repo_validation,
@@ -341,19 +341,13 @@ def calculate_metrics(
         # calculate metrics with bootstrapping
         bootstrap_df = bootstrap_metrics(y_pred_log, y_true_log, ept, n_bootstrap_samples=1000)
-        df_endpoint = bootstrap_df.pivot_table(
-            index=["Endpoint"],
-            columns="Metric",
-            values="Value",
-            aggfunc=["mean", "std"]
-        ).reset_index()
-        # Get a df with columns 'mean_MAE', 'std_MAE', ...
-        df_endpoint.columns = [
-            f'{i}_{j}' if i != '' else j for i, j in df_endpoint.columns
-        ]
-        df_endpoint.rename(columns={'Endpoint_': 'Endpoint'}, inplace=True)
         all_endpoint_results.append(df_endpoint)
     df_results = pd.concat(all_endpoint_results, ignore_index=True)

 from pathlib import Path
 from typing import Optional
 from about import (
+    ENDPOINTS, API, METRICS,
     submissions_repo,
     results_repo_test,
     results_repo_validation,
         # calculate metrics with bootstrapping
         bootstrap_df = bootstrap_metrics(y_pred_log, y_true_log, ept, n_bootstrap_samples=1000)
+        # Longer pivot alternative for the cases where all metric results are NaN, as pivot ignores those columns
+        grouped = bootstrap_df.groupby(["Endpoint", "Metric"])["Value"].agg(["mean", "std"])
+        df_unstacked = grouped.unstack(level="Metric")
+        df_reindexed = df_unstacked.reindex(columns=list(METRICS), level=1)
+        df_reindexed.columns = [f"{agg}_{metric}" for agg, metric in df_reindexed.columns]
+        df_endpoint = df_reindexed.reset_index()
         all_endpoint_results.append(df_endpoint)
     df_results = pd.concat(all_endpoint_results, ignore_index=True)

utils.py CHANGED Viewed

@@ -116,7 +116,7 @@ def metrics_per_ep(pred: np.ndarray,
     else:
         r2 = r2_score(true, pred)
-    if np.nanstd(pred) == 0:
         spr = np.nan
         ktau = np.nan
     else:

     else:
         r2 = r2_score(true, pred)
+    if np.nanstd(pred) < 0.0001:
         spr = np.nan
         ktau = np.nan
     else: