Spaces:
Running
Running
Commit
·
0ed953a
1
Parent(s):
a8b6a3f
Add whiff, csw, swing
Browse files- convert.py +1 -1
- data.py +15 -5
convert.py
CHANGED
|
@@ -95,7 +95,7 @@ presult = {
|
|
| 95 |
122: 'Catcher interference',
|
| 96 |
123: 'Uncaught third strike',
|
| 97 |
124: 'Sacrifice hit error',
|
| 98 |
-
125: 'Sacrifice fly
|
| 99 |
126: "Fielder's choice",
|
| 100 |
128: "Sacrifice fielder's choice",
|
| 101 |
129: 'Bunt strikeout',
|
|
|
|
| 95 |
122: 'Catcher interference',
|
| 96 |
123: 'Uncaught third strike',
|
| 97 |
124: 'Sacrifice hit error',
|
| 98 |
+
125: 'Sacrifice fly error',
|
| 99 |
126: "Fielder's choice",
|
| 100 |
128: "Sacrifice fielder's choice",
|
| 101 |
129: 'Bunt strikeout',
|
data.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import polars as pl
|
| 2 |
-
from glob import glob
|
| 3 |
import os
|
| 4 |
from tqdm.auto import tqdm
|
| 5 |
|
|
@@ -8,6 +7,7 @@ from convert import aux_global_id_to_code, presult, ball_kind, ball_kind_code, l
|
|
| 8 |
DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
|
| 9 |
# SEASONS = list(range(2021, 2025+1))
|
| 10 |
SEASONS = [2021, 2022, 2023, 2024, 2025]
|
|
|
|
| 11 |
|
| 12 |
data_df = pl.DataFrame()
|
| 13 |
text_df = pl.DataFrame()
|
|
@@ -86,8 +86,6 @@ aux_df = (
|
|
| 86 |
)
|
| 87 |
)
|
| 88 |
|
| 89 |
-
data_df = data_df
|
| 90 |
-
|
| 91 |
data_df = (
|
| 92 |
data_df
|
| 93 |
.with_columns(
|
|
@@ -153,16 +151,28 @@ data_df = (
|
|
| 153 |
|
| 154 |
pl.col('x').add(-100).mul(-1),
|
| 155 |
pl.col('y').neg().add(250),
|
| 156 |
-
pl.col('presult').
|
| 157 |
pl.col('ballKind').replace_strict(ball_kind),
|
| 158 |
pl.col('ballKind').replace_strict(ball_kind_code).alias('ballKind_code'),
|
| 159 |
pl.col('batLR').replace_strict(lr),
|
|
|
|
| 160 |
|
| 161 |
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
| 162 |
.then(pl.lit('Regular Season'))
|
| 163 |
.when(~pl.col('GameKindName').is_in(['Spring Training', 'All-Star Game']))
|
| 164 |
.then(pl.lit('Postseason'))
|
| 165 |
.otherwise('GameKindName')
|
| 166 |
-
.alias('coarse_game_kind')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
)
|
| 168 |
)
|
|
|
|
| 1 |
import polars as pl
|
|
|
|
| 2 |
import os
|
| 3 |
from tqdm.auto import tqdm
|
| 4 |
|
|
|
|
| 7 |
DATA_PATH = os.path.expanduser('~/Documents/npb_data_collector/npb')
|
| 8 |
# SEASONS = list(range(2021, 2025+1))
|
| 9 |
SEASONS = [2021, 2022, 2023, 2024, 2025]
|
| 10 |
+
# SEASONS = [2024]
|
| 11 |
|
| 12 |
data_df = pl.DataFrame()
|
| 13 |
text_df = pl.DataFrame()
|
|
|
|
| 86 |
)
|
| 87 |
)
|
| 88 |
|
|
|
|
|
|
|
| 89 |
data_df = (
|
| 90 |
data_df
|
| 91 |
.with_columns(
|
|
|
|
| 151 |
|
| 152 |
pl.col('x').add(-100).mul(-1),
|
| 153 |
pl.col('y').neg().add(250),
|
| 154 |
+
pl.col('presult').alias('presult_id'),
|
| 155 |
pl.col('ballKind').replace_strict(ball_kind),
|
| 156 |
pl.col('ballKind').replace_strict(ball_kind_code).alias('ballKind_code'),
|
| 157 |
pl.col('batLR').replace_strict(lr),
|
| 158 |
+
pl.col('date').str.to_date('%Y%m%d'),
|
| 159 |
|
| 160 |
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
| 161 |
.then(pl.lit('Regular Season'))
|
| 162 |
.when(~pl.col('GameKindName').is_in(['Spring Training', 'All-Star Game']))
|
| 163 |
.then(pl.lit('Postseason'))
|
| 164 |
.otherwise('GameKindName')
|
| 165 |
+
.alias('coarse_game_kind'),
|
| 166 |
+
)
|
| 167 |
+
.with_columns(
|
| 168 |
+
pl.col('presult_id').replace_strict(presult).alias('presult')
|
| 169 |
+
)
|
| 170 |
+
.with_columns(
|
| 171 |
+
pl.col('presult').is_in(['None', 'Balk', 'Batter interference', 'Catcher interference', 'Pitcher delay', 'Intentional walk', 'Unknown']).not_().alias('pitch'),
|
| 172 |
+
pl.col('presult').is_in(['Swinging strike', 'Swinging strikeout']).alias('whiff'),
|
| 173 |
+
)
|
| 174 |
+
.with_columns(
|
| 175 |
+
(pl.col('pitch') & pl.col('presult').is_in(['Hit by pitch', 'Sacrifice bunt', 'Sacrifice fly', 'Looking strike', 'Ball', 'Walk', 'Looking strikeout', 'Sacrifice hit error', 'Sacrifice fly error', "Sacrifice fielder's choice", 'Bunt strikeout']).not_()).alias('swing'),
|
| 176 |
+
(pl.col('whiff') | pl.col('presult').is_in(['Looking strike', 'Uncaught third strike', 'Looking strikeout'])).alias('csw')
|
| 177 |
)
|
| 178 |
)
|