Spaces:
Running
Running
Commit
·
82df431
1
Parent(s):
0e26e9f
Separate and cast bso state columns
Browse files
data.py
CHANGED
|
@@ -6,6 +6,7 @@ from huggingface_hub import snapshot_download
|
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
from string import ascii_letters
|
|
|
|
| 9 |
|
| 10 |
from convert import (
|
| 11 |
aux_global_id_to_code, presult,
|
|
@@ -47,10 +48,10 @@ for season in tqdm(SEASONS):
|
|
| 47 |
_aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
|
| 48 |
aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
|
| 49 |
|
| 50 |
-
backup_data_df = data_df
|
| 51 |
-
backup_aux_df = aux_df
|
| 52 |
-
backup_sched_df = sched_df
|
| 53 |
-
backup_aux_sched_df = aux_sched_df
|
| 54 |
|
| 55 |
aux_df = (
|
| 56 |
aux_df
|
|
@@ -155,6 +156,9 @@ data_df = (
|
|
| 155 |
(
|
| 156 |
aux_df.filter(~pl.col('ibb'))[['universal_code', 'battingResult', 'inning_pas', 'pa_pitches', 'beforeBso', 'bso']]
|
| 157 |
.rename({'battingResult': 'aux_bresult', 'inning_pas': 'aux_inning_pas', 'pa_pitches': 'aux_pa_pitches'})
|
|
|
|
|
|
|
|
|
|
| 158 |
),
|
| 159 |
on='universal_code',
|
| 160 |
how='left'
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
from string import ascii_letters
|
| 9 |
+
from itertools import product
|
| 10 |
|
| 11 |
from convert import (
|
| 12 |
aux_global_id_to_code, presult,
|
|
|
|
| 48 |
_aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
|
| 49 |
aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
|
| 50 |
|
| 51 |
+
# backup_data_df = data_df
|
| 52 |
+
# backup_aux_df = aux_df
|
| 53 |
+
# backup_sched_df = sched_df
|
| 54 |
+
# backup_aux_sched_df = aux_sched_df
|
| 55 |
|
| 56 |
aux_df = (
|
| 57 |
aux_df
|
|
|
|
| 156 |
(
|
| 157 |
aux_df.filter(~pl.col('ibb'))[['universal_code', 'battingResult', 'inning_pas', 'pa_pitches', 'beforeBso', 'bso']]
|
| 158 |
.rename({'battingResult': 'aux_bresult', 'inning_pas': 'aux_inning_pas', 'pa_pitches': 'aux_pa_pitches'})
|
| 159 |
+
.with_columns(
|
| 160 |
+
*[pl.col(f'{bso}').struct.field(field).cast(pl.Int32).alias(f'{"after" if bso == "bso" else "before"}_{field}') for bso, field in product(['beforeBso', 'bso'], ['b', 's', 'o'])]
|
| 161 |
+
)
|
| 162 |
),
|
| 163 |
on='universal_code',
|
| 164 |
how='left'
|