Maria Castellanos commited on
Commit
20ed309
·
1 Parent(s): 9638dbd

validations and improvements

Browse files
Files changed (5) hide show
  1. _static/challenge_logo.png +0 -0
  2. about.py +6 -2
  3. app.py +91 -32
  4. evaluate.py +158 -60
  5. utils.py +15 -0
_static/challenge_logo.png CHANGED
about.py CHANGED
@@ -15,13 +15,17 @@ ENDPOINTS = ["LogD",
15
  LB_COLS0 = ["endpoint",
16
  "user",
17
  "MAE",
 
18
  "R2",
19
  "Spearman R",
20
  "Kendall's Tau",
 
21
  "submission_time",
22
  "model_report"]
23
- LB_COLS = ["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details"]
24
- LB_DTYPES = ['markdown', 'number', 'number', 'number', 'number', 'str', 'markdown']
 
 
25
 
26
  TOKEN = os.environ.get("HF_TOKEN")
27
  CACHE_PATH=os.getenv("HF_HOME", ".")
 
15
  LB_COLS0 = ["endpoint",
16
  "user",
17
  "MAE",
18
+ "RAE",
19
  "R2",
20
  "Spearman R",
21
  "Kendall's Tau",
22
+ "data coverage (%)",
23
  "submission_time",
24
  "model_report"]
25
+ LB_COLS = ["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details",
26
+ "data coverage (%)"]
27
+ LB_AVG = ["user", "MA-RAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details"] # Delete some columns for overall LB?
28
+ LB_DTYPES = ['markdown', 'number', 'number', 'number', 'number', 'str', 'markdown', 'number']
29
 
30
  TOKEN = os.environ.get("HF_TOKEN")
31
  CACHE_PATH=os.getenv("HF_HOME", ".")
app.py CHANGED
@@ -6,8 +6,7 @@ import pandas as pd
6
  from evaluate import submit_data, evaluate_data
7
  from utils import make_tag_clickable, make_user_clickable, fetch_dataset_df
8
 
9
- from datetime import datetime
10
- from about import ENDPOINTS, LB_COLS, LB_DTYPES
11
 
12
 
13
  ALL_EPS = ['Average'] + ENDPOINTS
@@ -22,11 +21,19 @@ def build_leaderboard(df_results):
22
  per_ep[ep] = pd.DataFrame(columns=LB_COLS) # Empty df
23
  continue
24
 
25
- # Make user and model details clickable
26
- df['user'] = df['user'].apply(lambda x: make_user_clickable(x)).astype(str)
 
 
27
  df['model details'] = df['model_report'].apply(lambda x: make_tag_clickable(x)).astype(str)
28
 
29
- per_ep[ep] = df[LB_COLS]
 
 
 
 
 
 
30
 
31
  return per_ep
32
 
@@ -49,38 +56,36 @@ def gradio_interface():
49
 
50
  ### Header
51
  with gr.Row():
52
- with gr.Column(scale=8): # bigger text area
53
  gr.Markdown("""
54
- ## Welcome to the OpenADMET + XXX Blind Challenge!
55
  Your task is to develop and submit predictive models for key ADMET properties on a blinded test set of real world drug discovery data 🧑‍🔬
56
 
57
  Go to the **Leaderboard** to check out how the challenge is going.
58
  To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
59
  """
60
  )
61
- with gr.Column(scale=1): # smaller side column for logo
62
  gr.Image(
63
  value="./_static/challenge_logo.png",
64
  show_label=False,
65
  show_download_button=False,
66
- width="10vw", # Take up the width of the column (2/8 = 1/4)
67
  )
68
 
69
  # --- Welcome markdown message ---
70
  welcome_md = """
71
- # 💊 OpenADMET + XXX
72
  ## Computational Blind Challenge in ADMET
73
 
74
  This challenge is a community-driven initiative to benchmark predictive models for ADMET properties in drug discovery,
75
- hosted by **OpenADMET** in collaboration with **XXX**.
76
 
77
 
78
  ## Why are ADMET properties important in drug discovery?
79
  Small molecules continue to be the bricks and mortar of drug discovery globally, accounting for ~75% of FDA approvals over the last decade.
80
- Oral bioavailability, easily tunable properties, modulation of a wide range of mechanisms,
81
- and ease of manufacturing make small molecules highly attractive as therapeutic agents, a trend that is not expected to drastically change,
82
- despite increased interest in biologics. Indeed, newer small molecule modalities such as degraders, molecular glues, and antibody-drug conjugates
83
- (to name a few) make understanding small molecule properties more important than ever.
84
 
85
  It is fairly difficult to predict the lifetime and distribution of small molecules within the body. Additionally,
86
  interaction with off-targets can cause safety issues and toxicity. Collectively these *Absorption*, *Distribution*, *Metabolism*, *Excretion*, *Toxicology*--or **ADMET**--properties
@@ -90,7 +95,17 @@ def gradio_interface():
90
  that give rise to these properties through integrated structural biology, high throughput experimentation and integrative computational models.
91
  Read more about our strategy to transform drug discovery on our [website](https://openadmet.org/community/blogs/whatisopenadmet/).
92
 
93
- For this blind challenge we selected ten (10) crucial endpoints for the community to predict:
 
 
 
 
 
 
 
 
 
 
94
  - LogD
95
  - Kinetic Solubility **KSOL**: uM
96
  - Mouse Liver Microsomal (**MLM**) *CLint*: mL/min/kg
@@ -102,16 +117,18 @@ def gradio_interface():
102
  - Rat Liver Microsomal (**RLM**) *Clint*: mL/min/kg
103
  - Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound
104
 
 
 
105
  ## ✅ How to Participate
106
  1. **Register**: Create an account with Hugging Face.
107
- 2. **Download the Public Dataset**: Clone the XXX dataset [link]
108
  3. **Train Your Model**: Use the provided training data for each ADMET property of your choice.
109
  4. **Submit Predictions**: Follow the instructions in the *Submit* tab to upload your predictions.
110
- 5. Join the discussion on the [Challenge Discord](link)!
111
 
112
  ## 📊 Data:
113
 
114
- The training set will have the following variables:
115
 
116
  | Column | Unit | data type | Description |
117
  |:-----------------------------|-----------|-----------|:-------------|
@@ -128,15 +145,28 @@ def gradio_interface():
128
  | RLM CLint | mL/min/kg | float | Rat Liver Microsomal Stability |
129
  | MGMB. | % Unbound | float | Mouse Gastrocnemius Muscle Binding |
130
 
131
- At test time, we will only provide the Molecule Name and Smiles. Make sure your submission file has the same columns!
 
132
 
133
  ## 📝 Evaluation
134
- The challenge will be judged based on the judging criteria outlined here.
135
-
136
- - TBD
 
 
 
 
 
 
 
137
 
138
  📅 **Timeline**:
139
- - TBD
 
 
 
 
 
140
 
141
  ---
142
 
@@ -166,7 +196,7 @@ def gradio_interface():
166
  lboard_dict['Average'] = Leaderboard(
167
  value=build_leaderboard(current_df)['Average'],
168
  datatype=LB_DTYPES,
169
- select_columns=LB_COLS,
170
  search_columns=["user"],
171
  render=True,
172
  every=15,
@@ -207,10 +237,11 @@ def gradio_interface():
207
  gr.Markdown(
208
  """
209
  ## Participant Information
210
- To participate, we **only** require a Hugging Face username, which will be displayed on the leaderboard.
211
- Other information is optional but helps us track participation.
 
212
  If you wish to be included in Challenge discussions, please provide your Discord username and email.
213
- If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation.
214
 
215
  We also ask you to provide a link to a report decribing your method. While not mandatory at the time of participation,
216
  you need to submit the link before the challenge deadline in order to be considered for the final leaderboard.
@@ -221,7 +252,17 @@ def gradio_interface():
221
  username_input = gr.Textbox(
222
  label="Username",
223
  placeholder="Enter your Hugging Face username",
224
- info="This will be displayed on the leaderboard."
 
 
 
 
 
 
 
 
 
 
225
  )
226
  with gr.Column():
227
  # Info to track participant, that will not be displayed publicly
@@ -247,6 +288,10 @@ def gradio_interface():
247
  label="Model Report",
248
  placeholder="Link to a report describing your method (optional)",
249
  )
 
 
 
 
250
 
251
  with gr.Row():
252
  with gr.Column():
@@ -256,9 +301,14 @@ def gradio_interface():
256
  Upload a single CSV file containing your predictions for all ligands in the test set.
257
  Only your latest submission will be considered.
258
 
259
- You can download the ligand test set here (lik/to/download/smiles/csv).
260
  """
261
  )
 
 
 
 
 
262
  with gr.Column():
263
  predictions_file = gr.File(label="Single file with ADMET predictions (.csv)",
264
  file_types=[".csv"],
@@ -270,12 +320,21 @@ def gradio_interface():
270
  outputs=user_state
271
  )
272
 
273
- submit_btn = gr.Button("Submit Predictions")
274
  message = gr.Textbox(label="Status", lines=1, visible=False)
275
 
276
  submit_btn.click(
277
  submit_data,
278
- inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation, model_tag],
 
 
 
 
 
 
 
 
 
279
  outputs=[message, filename],
280
  ).success(
281
  fn=lambda m: gr.update(value=m, visible=True),
 
6
  from evaluate import submit_data, evaluate_data
7
  from utils import make_tag_clickable, make_user_clickable, fetch_dataset_df
8
 
9
+ from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES
 
10
 
11
 
12
  ALL_EPS = ['Average'] + ENDPOINTS
 
21
  per_ep[ep] = pd.DataFrame(columns=LB_COLS) # Empty df
22
  continue
23
 
24
+ # Make user and model details clickable if it's a huggingface user
25
+ df['user'] = df.apply(
26
+ lambda row: make_user_clickable(row['user']) if not row['anonymous'] else row['user'],
27
+ axis=1).astype(str)
28
  df['model details'] = df['model_report'].apply(lambda x: make_tag_clickable(x)).astype(str)
29
 
30
+ if ep == "Average":
31
+ df["MA-RAE"] = df["RAE"] # The average of the RAE per endpoint
32
+ sorted_df = df.sort_values(by='MA-RAE', ascending=True, kind="stable")
33
+ per_ep[ep] = sorted_df[LB_AVG]
34
+ else:
35
+ sorted_df = df.sort_values(by="MAE", ascending=True, kind="stable")
36
+ per_ep[ep] = sorted_df[LB_COLS]
37
 
38
  return per_ep
39
 
 
56
 
57
  ### Header
58
  with gr.Row():
59
+ with gr.Column(scale=7): # bigger text area
60
  gr.Markdown("""
61
+ ## Welcome to the OpenADMET + ExpansionRx Blind Challenge!
62
  Your task is to develop and submit predictive models for key ADMET properties on a blinded test set of real world drug discovery data 🧑‍🔬
63
 
64
  Go to the **Leaderboard** to check out how the challenge is going.
65
  To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
66
  """
67
  )
68
+ with gr.Column(scale=2): # smaller side column for logo
69
  gr.Image(
70
  value="./_static/challenge_logo.png",
71
  show_label=False,
72
  show_download_button=False,
73
+ width="5vw", # Take up the width of the column (2/8 = 1/4)
74
  )
75
 
76
  # --- Welcome markdown message ---
77
  welcome_md = """
78
+ # 💊 OpenADMET + ExpansionRx
79
  ## Computational Blind Challenge in ADMET
80
 
81
  This challenge is a community-driven initiative to benchmark predictive models for ADMET properties in drug discovery,
82
+ hosted by **OpenADMET** in collaboration with **ExpansionRx**.
83
 
84
 
85
  ## Why are ADMET properties important in drug discovery?
86
  Small molecules continue to be the bricks and mortar of drug discovery globally, accounting for ~75% of FDA approvals over the last decade.
87
+ Oral bioavailability, easily tunable properties, modulation of a wide range of mechanisms, and ease of manufacturing make small molecules highly attractive as therapeutic agents.
88
+ Moreover, emerging small molecule modalities such as degraders, expression modulators, molecular glues, and antibody-drug conjugates (to name a few) have vastly expanded what we thought small molecules were capable of.
 
 
89
 
90
  It is fairly difficult to predict the lifetime and distribution of small molecules within the body. Additionally,
91
  interaction with off-targets can cause safety issues and toxicity. Collectively these *Absorption*, *Distribution*, *Metabolism*, *Excretion*, *Toxicology*--or **ADMET**--properties
 
95
  that give rise to these properties through integrated structural biology, high throughput experimentation and integrative computational models.
96
  Read more about our strategy to transform drug discovery on our [website](https://openadmet.org/community/blogs/whatisopenadmet/).
97
 
98
+ Critical to our mission is developing open datasets and running community blind challenges to assess the current state of the art in ADMET modeling.
99
+ Building on the sucess of the recent [ASAP-Polaris-OpenADMET blind challenge](https://chemrxiv.org/engage/chemrxiv/article-details/68ac00d1728bf9025e22fe45) in computational methods for drug discovery,
100
+ we bring you a brand new challenge in collaboration with **ExpansionRx**. During a recent series of drug discovery campaigns for RNA mediated diseases,
101
+ ExpansionRX collected a variety of ADMET data for off-targets and properties of interest, which they are generously sharing with the community for this challenge.
102
+
103
+ ## 🧪 The Challenge
104
+
105
+ Participants will be tasked with solving real-world ADMET prediction problems ExpansionRx faced during lead optimization.
106
+ Specifically, you will be asked to predict the ADMET properties of late-stage molecules based on earlier-stage data from the same campaigns.
107
+ For this challenge we selected ten (10) crucial endpoints for the community to predict:
108
+
109
  - LogD
110
  - Kinetic Solubility **KSOL**: uM
111
  - Mouse Liver Microsomal (**MLM**) *CLint*: mL/min/kg
 
117
  - Rat Liver Microsomal (**RLM**) *Clint*: mL/min/kg
118
  - Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound
119
 
120
+ Find more information about these endpoints on our [blog](https://openadmet.org/community/blogs/challenge_announcement2/).
121
+
122
  ## ✅ How to Participate
123
  1. **Register**: Create an account with Hugging Face.
124
+ 2. **Download the Public Dataset**: Clone the ExpansionRx dataset [link]
125
  3. **Train Your Model**: Use the provided training data for each ADMET property of your choice.
126
  4. **Submit Predictions**: Follow the instructions in the *Submit* tab to upload your predictions.
127
+ 5. Join the discussion on the [Challenge Discord](https://discord.gg/MY5cEFHH3D)!
128
 
129
  ## 📊 Data:
130
 
131
+ The training set contains the following parameters:
132
 
133
  | Column | Unit | data type | Description |
134
  |:-----------------------------|-----------|-----------|:-------------|
 
145
  | RLM CLint | mL/min/kg | float | Rat Liver Microsomal Stability |
146
  | MGMB. | % Unbound | float | Mouse Gastrocnemius Muscle Binding |
147
 
148
+ You can download the training data from the [Hugging Face dataset](https://huggingface.co/datasets/OpenADMET/openadmet-challenge-training-set).
149
+ The test set will remained blinded until the challenge submission deadline. You will be tasked with predicting the same set of ADMET endpoints for the test set molecules.
150
 
151
  ## 📝 Evaluation
152
+ The challenge will be judged based on the following criteria:
153
+ - We welcome submissions of any kind, including machine learning and physics-based approaches. You can also employ pre-training approaches as you see fit,
154
+ as well as incorporate data from external sources into your models and submissions.
155
+ - In the spirit of open science and open source we would love to see code showing how you created your submission if possible, in the form of a Github Repository.
156
+ If not possible due to IP or other constraints you must at a minimum provide a short report written methodology based on the template [here](link to google doc).
157
+ **Make sure your lat submission before the deadline includes a link to a report or to a Github repository.**
158
+ - Each participant can submit as many times as they like, up to a limit of 5 times/day. **Only your latest submission will be considered for the final leaderboard.**
159
+ - The endpoints will be judged individually by mean absolute error (**MAE**), while an overall leaderboard will be judged by the macro-averaged relative absolute error (**MA-RAE**).
160
+ - For endpoints that are not already on a log scale (e.g LogD) they will be transformed to log scale to minimize the impact of outliers on evaluation.
161
+ - We will estimate errors on the metrics using bootstrapping and use the statistical testing workflow outlined in [this paper](https://chemrxiv.org/engage/chemrxiv/article-details/672a91bd7be152b1d01a926b) to determine if model performance is statistically distinct.
162
 
163
  📅 **Timeline**:
164
+ - **September 12:** Challenge announcement
165
+ - **September XX:** Sample data release
166
+ - **October 27:** Challenge starts
167
+ - **October-November:** Online Q&A sessions and support via the Discord channel
168
+ - **January 19, 2026:** Submission closes
169
+ - **January 26, 2026:** Winners announced
170
 
171
  ---
172
 
 
196
  lboard_dict['Average'] = Leaderboard(
197
  value=build_leaderboard(current_df)['Average'],
198
  datatype=LB_DTYPES,
199
+ select_columns=LB_AVG,
200
  search_columns=["user"],
201
  render=True,
202
  every=15,
 
237
  gr.Markdown(
238
  """
239
  ## Participant Information
240
+ To participate, **we require a Hugging Face username**, which will be used to track multiple submissions.
241
+ Your username will be displayed on the leaderboard, unless you check the *anonymous* box. If you want to remain anonymous, please provide an alias to be used for the leaderboard (we'll keep the username hidden).
242
+
243
  If you wish to be included in Challenge discussions, please provide your Discord username and email.
244
+ If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation (and check the box below).
245
 
246
  We also ask you to provide a link to a report decribing your method. While not mandatory at the time of participation,
247
  you need to submit the link before the challenge deadline in order to be considered for the final leaderboard.
 
252
  username_input = gr.Textbox(
253
  label="Username",
254
  placeholder="Enter your Hugging Face username",
255
+ # info="This will be displayed on the leaderboard."
256
+ )
257
+ user_alias = gr.Textbox(
258
+ label="Optional Alias",
259
+ placeholder="Enter an identifying alias for the leaderboard if you wish to remain anonymous",
260
+ # info="This will be displayed on the leaderboard."
261
+ )
262
+ anon_checkbox = gr.Checkbox(
263
+ label="I want to submit anonymously",
264
+ info="If checked, your username will be replaced with 'anonymous' on the leaderboard.",
265
+ value=False,
266
  )
267
  with gr.Column():
268
  # Info to track participant, that will not be displayed publicly
 
288
  label="Model Report",
289
  placeholder="Link to a report describing your method (optional)",
290
  )
291
+ paper_checkbox = gr.Checkbox(
292
+ label="I want to be included in a future publication detailing the Challenge results",
293
+ value=False,
294
+ )
295
 
296
  with gr.Row():
297
  with gr.Column():
 
301
  Upload a single CSV file containing your predictions for all ligands in the test set.
302
  Only your latest submission will be considered.
303
 
304
+ You can download a CSV template with the ligands in the test set here.
305
  """
306
  )
307
+ download_btn = gr.DownloadButton(
308
+ label="📥 Download Test Set Template",
309
+ value="data/test_set-example.csv",
310
+ variant="secondary",
311
+ )
312
  with gr.Column():
313
  predictions_file = gr.File(label="Single file with ADMET predictions (.csv)",
314
  file_types=[".csv"],
 
320
  outputs=user_state
321
  )
322
 
323
+ submit_btn = gr.Button("📤 Submit Predictions")
324
  message = gr.Textbox(label="Status", lines=1, visible=False)
325
 
326
  submit_btn.click(
327
  submit_data,
328
+ inputs=[predictions_file,
329
+ user_state,
330
+ participant_name,
331
+ discord_username,
332
+ email,
333
+ affiliation,
334
+ model_tag,
335
+ user_alias,
336
+ anon_checkbox,
337
+ paper_checkbox],
338
  outputs=[message, filename],
339
  ).success(
340
  fn=lambda m: gr.update(value=m, visible=True),
evaluate.py CHANGED
@@ -1,27 +1,84 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from pathlib import Path
4
- from scipy.stats import spearmanr, kendalltau
5
- from sklearn.metrics import mean_absolute_error, r2_score
6
  from typing import Optional
7
  from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
 
8
  from huggingface_hub import hf_hub_download
9
  import datetime
10
  import io
11
  import json, tempfile
12
- import pydantic
 
 
 
 
 
 
 
13
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- class ParticipantRecord(pydantic.BaseModel):
16
- hf_username: Optional[str] = pydantic.Field(default=None, description="Hugging Face username")
17
- participant_name: Optional[str] = pydantic.Field(default=None, description="Participant's real name")
18
- discord_username: Optional[str] = pydantic.Field(default=None, description="Discord username")
19
- email: Optional[str] = pydantic.Field(default=None, description="Email address")
20
- affiliation: Optional[str] = pydantic.Field(default=None, description="Affiliation")
21
- model_tag: Optional[str] = pydantic.Field(default=None, description="Model tag")
 
 
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- class SubmissionMetadata(pydantic.BaseModel):
 
 
 
 
 
 
25
  submission_time_utc: str
26
  user: str
27
  original_filename: str
@@ -29,12 +86,6 @@ class SubmissionMetadata(pydantic.BaseModel):
29
  participant: ParticipantRecord
30
 
31
 
32
- def _safeify_username(username: str) -> str:
33
- return str(username.strip()).replace("/", "_").replace(" ", "_")
34
-
35
- def _unsafify_username(username: str) -> str:
36
- return str(username.strip()).replace("/", "_").replace(" ", "_")
37
-
38
  def submit_data(predictions_file: str,
39
  user_state,
40
  participant_name: str = "",
@@ -42,13 +93,15 @@ def submit_data(predictions_file: str,
42
  email: str = "",
43
  affiliation: str = "",
44
  model_tag: str = "",
 
 
 
45
  ):
46
 
47
  if user_state is None:
48
  raise gr.Error("Username or alias is required for submission.")
49
 
50
  file_path = Path(predictions_file).resolve()
51
-
52
  if not file_path.exists():
53
  raise gr.Error("Uploaded file object does not have a valid file path.")
54
 
@@ -60,29 +113,13 @@ def submit_data(predictions_file: str,
60
 
61
  if results_df.empty:
62
  return gr.Error("The uploaded file is empty.")
63
- if not set(ENDPOINTS).issubset(set(results_df.columns)):
64
- return gr.Error(f"The uploaded file must contain all endpoint predictions {ENDPOINTS} as columns.")
65
 
66
- # TODO, much more validation logic needed depending on the state of final data
67
-
68
- # Build destination filename in the dataset
69
- ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds") # should keep default time so can be deserialized correctly
70
- safe_user = _safeify_username(user_state)
71
-
72
- destination_csv = f"submissions/{safe_user}_{ts}.csv"
73
- destination_json = destination_csv.replace(".csv", ".json")
74
- # Upload the CSV file
75
- API.upload_file(
76
- path_or_fileobj=str(file_path),
77
- path_in_repo=destination_csv,
78
- repo_id=submissions_repo,
79
- repo_type="dataset",
80
- commit_message=f"Add submission for {safe_user} at {ts}"
81
- )
82
 
83
- # Optional participant record
84
  try:
85
-
86
  participant_record = ParticipantRecord(
87
  hf_username=user_state,
88
  participant_name=participant_name,
@@ -90,11 +127,15 @@ def submit_data(predictions_file: str,
90
  email=email,
91
  affiliation=affiliation,
92
  model_tag=model_tag,
 
 
 
93
  )
94
- except pydantic.ValidationError as e:
95
  return f"❌ Error in participant information: {str(e)}"
96
-
97
-
 
98
  try:
99
  meta = SubmissionMetadata(
100
  submission_time_utc=ts,
@@ -103,11 +144,23 @@ def submit_data(predictions_file: str,
103
  evaluated=False,
104
  participant=participant_record
105
  )
106
- except pydantic.ValidationError as e:
107
- return f"❌ Error in metadata information: {str(e)}"
108
 
109
- meta_bytes = io.BytesIO(json.dumps(meta.model_dump(), indent=2).encode("utf-8"))
 
 
110
 
 
 
 
 
 
 
 
 
 
 
111
  API.upload_file(
112
  path_or_fileobj=meta_bytes,
113
  path_in_repo=destination_json,
@@ -135,7 +188,7 @@ def evaluate_data(filename: str) -> None:
135
  test_path = hf_hub_download(
136
  repo_id=test_repo,
137
  repo_type="dataset",
138
- filename="data/challenge_mock_test_set.csv", #Replace later with "test_dataset.csv" later!!,
139
  )
140
  except Exception as e:
141
  raise gr.Error(f"Failed to download test file: {e}")
@@ -163,13 +216,18 @@ def evaluate_data(filename: str) -> None:
163
  username = meta.participant.hf_username
164
  timestamp = meta.submission_time_utc
165
  report = meta.participant.model_tag
 
 
 
 
166
  except Exception as e:
167
  raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")
168
 
169
  # Write results to results dataset
170
- results_df['user'] = username
171
  results_df['submission_time'] = timestamp
172
  results_df['model_report'] = report
 
173
  safe_user = _unsafify_username(username)
174
  destination_path = f"results/{safe_user}_{timestamp}_results.csv"
175
  tmp_name = None
@@ -192,29 +250,69 @@ def calculate_metrics(
192
  results_dataframe: pd.DataFrame,
193
  test_dataframe: pd.DataFrame
194
  ):
 
 
 
195
 
196
- def metrics_per_ep(pred, true):
197
- mae = mean_absolute_error(true, pred)
198
- r2 = r2_score(true, pred)
199
- spr, _ = spearmanr(true, pred)
200
- ktau, _ = kendalltau(true, pred)
201
- return mae, r2, spr, ktau
 
 
202
 
203
- df_results = pd.DataFrame(columns=["endpoint", "MAE", "R2", "Spearman R", "Kendall's Tau"])
204
  for i, measurement in enumerate(ENDPOINTS):
205
- df_pred = results_dataframe[['Molecule Name', measurement]].dropna()
206
- df_true = test_dataframe[['Molecule Name', measurement]].dropna()
207
- # Make sure both have the same order
208
- pred = df_pred.sort_values(by='Molecule Name')[measurement]
209
- true = df_true.sort_values(by='Molecule Name')[measurement]
210
- mae, r2, spearman, ktau = metrics_per_ep(pred, true)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  df_results.loc[i, 'endpoint'] = measurement
212
  df_results.loc[i, 'MAE'] = mae
 
213
  df_results.loc[i, 'R2'] = r2
214
  df_results.loc[i, 'Spearman R'] = spearman
215
  df_results.loc[i, "Kendall's Tau"] = ktau
 
216
 
217
- num_cols = ["MAE", "R2", "Spearman R", "Kendall's Tau"]
 
218
  df_results[num_cols] = df_results[num_cols].apply(pd.to_numeric, errors="coerce")
219
  means = df_results[num_cols].mean()
220
  avg_row = {"endpoint": "Average", **means.to_dict()}
 
1
  import gradio as gr
2
  import pandas as pd
3
  from pathlib import Path
 
 
4
  from typing import Optional
5
  from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
6
+ from utils import metrics_per_ep
7
  from huggingface_hub import hf_hub_download
8
  import datetime
9
  import io
10
  import json, tempfile
11
+ import re
12
+ from pydantic import (
13
+ BaseModel,
14
+ Field,
15
+ model_validator,
16
+ field_validator,
17
+ ValidationError
18
+ )
19
 
20
+ HF_USERNAME_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-_]{1,38})$")
21
+ def _safeify_username(username: str) -> str:
22
+ return str(username.strip()).replace("/", "_").replace(" ", "_")
23
+
24
+ def _unsafify_username(username: str) -> str:
25
+ return str(username.strip()).replace("/", "_").replace(" ", "_")
26
+
27
+ def _check_required_columns(df: pd.DataFrame, name: str, cols: list[str]):
28
+ missing = [c for c in cols if c not in df.columns]
29
+ if missing:
30
+ raise ValueError(f"{name} is missing required columns: {missing}")
31
 
32
+ class ParticipantRecord(BaseModel):
33
+ hf_username: str = Field(description="Hugging Face username")
34
+ display_name: Optional[str] = Field(description="Name to display on leaderboard")
35
+ participant_name: Optional[str] = Field(default=None, description="Participant's real name")
36
+ discord_username: Optional[str] = Field(default=None, description="Discord username")
37
+ email: Optional[str] = Field(default=None, description="Email address")
38
+ affiliation: Optional[str] = Field(default=None, description="Affiliation")
39
+ model_tag: Optional[str] = Field(default=None, description="Link to model description")
40
+ anonymous: bool = Field(default=False, description="Whether to display username as 'anonymous'")
41
+ consent_publication: bool = Field(default=False, description="Consent to be included in publications")
42
 
43
+ @field_validator("hf_username")
44
+ @classmethod
45
+ def validate_hf_username(cls, v: str) -> str:
46
+ v = v.strip()
47
+ if not HF_USERNAME_RE.match(v):
48
+ raise gr.Error("Invalid Hugging Face username (letters, numbers, -, _; min 2, max ~39).")
49
+ return v
50
+
51
+ @field_validator("display_name")
52
+ @classmethod
53
+ def validate_display_name(cls, v: Optional[str]) -> Optional[str]:
54
+ if v is None:
55
+ return None
56
+ v = v.strip()
57
+ if not v:
58
+ return None
59
+ if len(v) > 20:
60
+ raise ValueError("Display name is too long (max 20 chars).")
61
+ return v
62
+
63
+ @field_validator("model_tag", mode="before")
64
+ @classmethod
65
+ def normalize_url(cls, v):
66
+ if v is None:
67
+ return v
68
+ s = str(v).strip()
69
+ if not s:
70
+ return None
71
+ if "://" not in s:
72
+ s = "https://" + s
73
+ return s
74
 
75
+ @model_validator(mode="after")
76
+ def require_display_name_if_anonymous(self) -> "ParticipantRecord":
77
+ if self.anonymous and not self.display_name:
78
+ raise ValueError("Alias is required when anonymous box is checked.")
79
+ return self
80
+
81
+ class SubmissionMetadata(BaseModel):
82
  submission_time_utc: str
83
  user: str
84
  original_filename: str
 
86
  participant: ParticipantRecord
87
 
88
 
 
 
 
 
 
 
89
  def submit_data(predictions_file: str,
90
  user_state,
91
  participant_name: str = "",
 
93
  email: str = "",
94
  affiliation: str = "",
95
  model_tag: str = "",
96
+ user_display: str = "",
97
+ anon_checkbox: bool = False,
98
+ paper_checkbox: bool = False
99
  ):
100
 
101
  if user_state is None:
102
  raise gr.Error("Username or alias is required for submission.")
103
 
104
  file_path = Path(predictions_file).resolve()
 
105
  if not file_path.exists():
106
  raise gr.Error("Uploaded file object does not have a valid file path.")
107
 
 
113
 
114
  if results_df.empty:
115
  return gr.Error("The uploaded file is empty.")
 
 
116
 
117
+ missing = set(ENDPOINTS) - set(results_df.columns)
118
+ if missing:
119
+ return gr.Error(f"The uploaded file must contain all endpoint predictions {ENDPOINTS} as columns.")
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ # Save participant record
122
  try:
 
123
  participant_record = ParticipantRecord(
124
  hf_username=user_state,
125
  participant_name=participant_name,
 
127
  email=email,
128
  affiliation=affiliation,
129
  model_tag=model_tag,
130
+ display_name=user_display,
131
+ anonymous=anon_checkbox,
132
+ consent_publication=paper_checkbox
133
  )
134
+ except ValidationError as e:
135
  return f"❌ Error in participant information: {str(e)}"
136
+
137
+ # Build destination filename in the dataset
138
+ ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds") # should keep default time so can be deserialized correctly
139
  try:
140
  meta = SubmissionMetadata(
141
  submission_time_utc=ts,
 
144
  evaluated=False,
145
  participant=participant_record
146
  )
147
+ except ValidationError as e:
148
+ return f"❌ Error in metadata information: {str(e)}"
149
 
150
+ safe_user = _safeify_username(user_state)
151
+ destination_csv = f"submissions/{safe_user}_{ts}.csv"
152
+ destination_json = destination_csv.replace(".csv", ".json")
153
 
154
+ # Upload the CSV file
155
+ API.upload_file(
156
+ path_or_fileobj=str(file_path),
157
+ path_in_repo=destination_csv,
158
+ repo_id=submissions_repo,
159
+ repo_type="dataset",
160
+ commit_message=f"Add submission for {safe_user} at {ts}"
161
+ )
162
+ # Upload the metadata JSON file
163
+ meta_bytes = io.BytesIO(json.dumps(meta.model_dump(), indent=2).encode("utf-8"))
164
  API.upload_file(
165
  path_or_fileobj=meta_bytes,
166
  path_in_repo=destination_json,
 
188
  test_path = hf_hub_download(
189
  repo_id=test_repo,
190
  repo_type="dataset",
191
+ filename="data/challenge_mock_test_set.csv", #Replace later with "test_dataset.csv",
192
  )
193
  except Exception as e:
194
  raise gr.Error(f"Failed to download test file: {e}")
 
216
  username = meta.participant.hf_username
217
  timestamp = meta.submission_time_utc
218
  report = meta.participant.model_tag
219
+ if meta.participant.anonymous:
220
+ display_name = meta.participant.display_name
221
+ else:
222
+ display_name = username
223
  except Exception as e:
224
  raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")
225
 
226
  # Write results to results dataset
227
+ results_df['user'] = display_name
228
  results_df['submission_time'] = timestamp
229
  results_df['model_report'] = report
230
+ results_df['anonymous'] = meta.participant.anonymous
231
  safe_user = _unsafify_username(username)
232
  destination_path = f"results/{safe_user}_{timestamp}_results.csv"
233
  tmp_name = None
 
250
  results_dataframe: pd.DataFrame,
251
  test_dataframe: pd.DataFrame
252
  ):
253
+ import numpy as np
254
+
255
+ # Do some checks
256
 
257
+ # 1) Check all columns are present
258
+ _check_required_columns(results_dataframe, "Results file", ["Molecule Name"] + ENDPOINTS)
259
+ _check_required_columns(test_dataframe, "Test file", ["Molecule Name"] + ENDPOINTS)
260
+ # 2) Check all Molecules in the test set are present in the predictions
261
+ merged_df = pd.merge(test_dataframe, results_dataframe, on=['Molecule Name'], how='left', indicator=True)
262
+ if not (merged_df['_merge'] == 'both').all():
263
+ raise gr.Error("The predictions file is missing some molecules present in the test set. Please ensure all molecules are included.")
264
+ # TODO: What to do when a molecule is duplicated in the Predictions file?
265
 
266
+ df_results = pd.DataFrame(columns=["endpoint", "MAE", "RAE", "R2", "Spearman R", "Kendall's Tau"])
267
  for i, measurement in enumerate(ENDPOINTS):
268
+ df_pred = results_dataframe[['Molecule Name', measurement]].copy()
269
+ df_true = test_dataframe[['Molecule Name', measurement]].copy()
270
+ # coerce numeric columns
271
+ df_pred[measurement] = pd.to_numeric(df_pred[measurement], errors="coerce")
272
+ df_true[measurement] = pd.to_numeric(df_true[measurement], errors="coerce")
273
+
274
+ if df_pred[measurement].isnull().all():
275
+ # TODO: Allow missing endpoints or raise an error?
276
+ raise gr.Error(f"All predictions are missing for endpoint {measurement}. Please provide valid predictions.")
277
+
278
+ # Drop NaNs and calculate coverage
279
+ merged = (
280
+ df_pred.rename(columns={measurement: f"{measurement}_pred"})
281
+ .merge(
282
+ df_true.rename(columns={measurement: f"{measurement}_true"}),
283
+ on="Molecule Name",
284
+ how="inner",
285
+ )
286
+ .dropna(subset=[f"{measurement}_pred", f"{measurement}_true"])
287
+ )
288
+ n_total = merged[f"{measurement}_true"].notna().sum() # Valid test set points
289
+ n_pairs = len(merged) # actual pairs with predictions
290
+ coverage = (n_pairs / n_total * 100.0) if n_total else 0.0
291
+ merged = merged.sort_values("Molecule Name", kind="stable")
292
+
293
+ # validate pairs
294
+ if n_pairs < 10:
295
+ mae = rae = r2 = spearman = ktau = np.nan
296
+ else:
297
+ y_pred = merged[f"{measurement}_pred"].to_numpy()
298
+ y_true = merged[f"{measurement}_true"].to_numpy()
299
+ # Force log scale for all endpoints except LogD (for outliers)
300
+ if measurement != "LogD":
301
+ y_pred = np.log10(y_pred)
302
+ y_true = np.log10(y_true)
303
+ mae, rae, r2, spearman, ktau = metrics_per_ep(y_pred, y_true)
304
+
305
+
306
  df_results.loc[i, 'endpoint'] = measurement
307
  df_results.loc[i, 'MAE'] = mae
308
+ df_results.loc[i, 'RAE'] = rae
309
  df_results.loc[i, 'R2'] = r2
310
  df_results.loc[i, 'Spearman R'] = spearman
311
  df_results.loc[i, "Kendall's Tau"] = ktau
312
+ df_results.loc[i, 'data coverage (%)'] = coverage
313
 
314
+ # Average results
315
+ num_cols = ["MAE", "RAE", "R2", "Spearman R", "Kendall's Tau", "data coverage (%)"]
316
  df_results[num_cols] = df_results[num_cols].apply(pd.to_numeric, errors="coerce")
317
  means = df_results[num_cols].mean()
318
  avg_row = {"endpoint": "Average", **means.to_dict()}
utils.py CHANGED
@@ -1,5 +1,6 @@
1
 
2
  import pandas as pd
 
3
  from datasets import load_dataset
4
  from about import results_repo
5
  from about import LB_COLS0
@@ -31,3 +32,17 @@ def fetch_dataset_df():
31
  )
32
  latest.rename(columns={"submission_time": "submission time"}, inplace=True)
33
  return latest
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  import pandas as pd
3
+ import numpy as np
4
  from datasets import load_dataset
5
  from about import results_repo
6
  from about import LB_COLS0
 
32
  )
33
  latest.rename(columns={"submission_time": "submission time"}, inplace=True)
34
  return latest
35
+
36
+ def metrics_per_ep(pred, true):
37
+ from scipy.stats import spearmanr, kendalltau
38
+ from sklearn.metrics import mean_absolute_error, r2_score
39
+ mae = mean_absolute_error(true, pred)
40
+ rae = mae / np.mean(np.abs(true - np.mean(true)))
41
+ if np.nanstd(true) == 0:
42
+ r2=np.nan
43
+ else:
44
+ r2 = r2_score(true, pred)
45
+ spr, _ = spearmanr(true, pred)
46
+ ktau, _ = kendalltau(true, pred)
47
+
48
+ return mae, rae, r2, spr, ktau