Spaces:
Running
Running
Commit
·
c5755f5
1
Parent(s):
fc97909
Enforce black font color in percentile cells and change # pinned columns to 1
Browse files- data.py +5 -0
- pitch_leaderboard.py +8 -7
- player_team_leaderboard.py +8 -6
- stats.py +71 -33
data.py
CHANGED
|
@@ -47,6 +47,11 @@ for season in tqdm(SEASONS):
|
|
| 47 |
_aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
|
| 48 |
aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
aux_df = (
|
| 51 |
aux_df
|
| 52 |
.filter(pl.col('type') != 'RUNNER')
|
|
|
|
| 47 |
_aux_sched_df = pl.read_parquet(os.path.join(DATA_PATH, str(season), 'aux_schedule.parquet'))
|
| 48 |
aux_sched_df = pl.concat((aux_sched_df, _aux_sched_df))
|
| 49 |
|
| 50 |
+
backup_data_df = data_df
|
| 51 |
+
backup_aux_df = aux_df
|
| 52 |
+
backup_sched_df = sched_df
|
| 53 |
+
backup_aux_sched_df = aux_sched_df
|
| 54 |
+
|
| 55 |
aux_df = (
|
| 56 |
aux_df
|
| 57 |
.filter(pl.col('type') != 'RUNNER')
|
pitch_leaderboard.py
CHANGED
|
@@ -82,10 +82,10 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B
|
|
| 82 |
for i, row in enumerate(pitch_stats[COLUMNS].iter_rows()):
|
| 83 |
styling_row = []
|
| 84 |
for col, item in zip(pitch_stats[COLUMNS].columns, row):
|
| 85 |
-
_styling = 'font-size: 0.75em; '
|
| 86 |
if col in STATS_WITH_PCTLS:
|
| 87 |
r, g, b = (stat_cmap([pitch_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
|
| 88 |
-
styling_row.append(f'background-color: rgba({r}, {g}, {b})')
|
| 89 |
elif col == 'Team':
|
| 90 |
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
|
| 91 |
elif col in ['Pitch', 'Pitch (General)']:
|
|
@@ -141,13 +141,14 @@ def create_pitch_leaderboard():
|
|
| 141 |
all_teams = gr.Button('Select/Deselect all teams')
|
| 142 |
|
| 143 |
search = gr.Button('Search')
|
| 144 |
-
pin_columns = gr.Button('Pin columns')
|
|
|
|
| 145 |
leaderboard = gr.DataFrame(
|
| 146 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 147 |
column_widths=[125, 75, 60, 125, 125] + [max(50, 10*len(stat)) for stat in STATS],
|
| 148 |
show_copy_button=True,
|
| 149 |
show_search='filter',
|
| 150 |
-
pinned_columns=
|
| 151 |
elem_id='leaderboard'
|
| 152 |
)
|
| 153 |
|
|
@@ -161,9 +162,9 @@ def create_pitch_leaderboard():
|
|
| 161 |
# inputs=pin_columns,
|
| 162 |
# outputs=leaderboard
|
| 163 |
# )
|
| 164 |
-
pin_columns.
|
| 165 |
-
lambda : gr.
|
| 166 |
-
|
| 167 |
outputs=leaderboard
|
| 168 |
)
|
| 169 |
|
|
|
|
| 82 |
for i, row in enumerate(pitch_stats[COLUMNS].iter_rows()):
|
| 83 |
styling_row = []
|
| 84 |
for col, item in zip(pitch_stats[COLUMNS].columns, row):
|
| 85 |
+
# _styling = 'font-size: 0.75em; '
|
| 86 |
if col in STATS_WITH_PCTLS:
|
| 87 |
r, g, b = (stat_cmap([pitch_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
|
| 88 |
+
styling_row.append(f'color: black; background-color: rgba({r}, {g}, {b})')
|
| 89 |
elif col == 'Team':
|
| 90 |
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
|
| 91 |
elif col in ['Pitch', 'Pitch (General)']:
|
|
|
|
| 141 |
all_teams = gr.Button('Select/Deselect all teams')
|
| 142 |
|
| 143 |
search = gr.Button('Search')
|
| 144 |
+
# pin_columns = gr.Button('Pin columns')
|
| 145 |
+
pin_columns = gr.Checkbox(True, label='Pin columns')
|
| 146 |
leaderboard = gr.DataFrame(
|
| 147 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 148 |
column_widths=[125, 75, 60, 125, 125] + [max(50, 10*len(stat)) for stat in STATS],
|
| 149 |
show_copy_button=True,
|
| 150 |
show_search='filter',
|
| 151 |
+
pinned_columns=1,
|
| 152 |
elem_id='leaderboard'
|
| 153 |
)
|
| 154 |
|
|
|
|
| 162 |
# inputs=pin_columns,
|
| 163 |
# outputs=leaderboard
|
| 164 |
# )
|
| 165 |
+
pin_columns.input(
|
| 166 |
+
lambda pin: gr.DataFrame(pinned_columns=1 if pin else None),
|
| 167 |
+
inputs=pin_columns,
|
| 168 |
outputs=leaderboard
|
| 169 |
)
|
| 170 |
|
player_team_leaderboard.py
CHANGED
|
@@ -64,8 +64,8 @@ def create_player_team_leaderboard_app(player_team_type):
|
|
| 64 |
cols = [col for col in cols if col not in ('Pitcher', 'Throws')]
|
| 65 |
else:
|
| 66 |
pct_stats = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 67 |
-
stats_with_pctls = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 68 |
-
cols = ['Batter', 'Team', 'Bats', 'PA', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 69 |
if team:
|
| 70 |
cols = [col for col in cols if col not in ('Batter', 'Bats')]
|
| 71 |
|
|
@@ -123,10 +123,10 @@ def create_player_team_leaderboard_app(player_team_type):
|
|
| 123 |
for i, row in enumerate(pitcher_stats[cols].iter_rows()):
|
| 124 |
styling_row = []
|
| 125 |
for col, item in zip(pitcher_stats[cols].columns, row):
|
| 126 |
-
_styling = 'font-size: 0.75em; '
|
| 127 |
if col in stats_with_pctls:
|
| 128 |
r, g, b = (stat_cmap([pitcher_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
|
| 129 |
-
styling_row.append(f'background-color: rgba({r}, {g}, {b})')
|
| 130 |
elif col == 'Team':
|
| 131 |
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
|
| 132 |
else:
|
|
@@ -139,6 +139,8 @@ def create_player_team_leaderboard_app(player_team_type):
|
|
| 139 |
for col, item in zip(cols, row):
|
| 140 |
if col in pct_stats:
|
| 141 |
display_value_row.append(f'{item:.1f}%')
|
|
|
|
|
|
|
| 142 |
elif isinstance(item, float):
|
| 143 |
display_value_row.append(f'{item:.1f}')
|
| 144 |
else:
|
|
@@ -187,7 +189,7 @@ def create_player_team_leaderboard_app(player_team_type):
|
|
| 187 |
column_widths=[get_col_width(col, player_team_type) for col in cols],
|
| 188 |
show_copy_button=True,
|
| 189 |
show_search='filter',
|
| 190 |
-
pinned_columns=
|
| 191 |
elem_id='leaderboard'
|
| 192 |
)
|
| 193 |
|
|
@@ -198,7 +200,7 @@ def create_player_team_leaderboard_app(player_team_type):
|
|
| 198 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 199 |
qualified.change(lambda qualified: gr.Number(interactive=not qualified), inputs=qualified, outputs=min_ip)
|
| 200 |
pin_columns.input(
|
| 201 |
-
lambda pin: gr.DataFrame(pinned_columns=
|
| 202 |
inputs=pin_columns,
|
| 203 |
outputs=leaderboard
|
| 204 |
)
|
|
|
|
| 64 |
cols = [col for col in cols if col not in ('Pitcher', 'Throws')]
|
| 65 |
else:
|
| 66 |
pct_stats = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 67 |
+
stats_with_pctls = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%'] # , 'OBP']
|
| 68 |
+
cols = ['Batter', 'Team', 'Bats', 'PA', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']#, 'AB', 'H', 'BB', 'HBP', 'SF', 'OBP']
|
| 69 |
if team:
|
| 70 |
cols = [col for col in cols if col not in ('Batter', 'Bats')]
|
| 71 |
|
|
|
|
| 123 |
for i, row in enumerate(pitcher_stats[cols].iter_rows()):
|
| 124 |
styling_row = []
|
| 125 |
for col, item in zip(pitcher_stats[cols].columns, row):
|
| 126 |
+
# _styling = 'font-size: 0.75em; '
|
| 127 |
if col in stats_with_pctls:
|
| 128 |
r, g, b = (stat_cmap([pitcher_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
|
| 129 |
+
styling_row.append(f'color: black; background-color: rgba({r}, {g}, {b})')
|
| 130 |
elif col == 'Team':
|
| 131 |
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
|
| 132 |
else:
|
|
|
|
| 139 |
for col, item in zip(cols, row):
|
| 140 |
if col in pct_stats:
|
| 141 |
display_value_row.append(f'{item:.1f}%')
|
| 142 |
+
elif col in ['OBP']:
|
| 143 |
+
display_value_row.append(f'{item:.3f}')
|
| 144 |
elif isinstance(item, float):
|
| 145 |
display_value_row.append(f'{item:.1f}')
|
| 146 |
else:
|
|
|
|
| 189 |
column_widths=[get_col_width(col, player_team_type) for col in cols],
|
| 190 |
show_copy_button=True,
|
| 191 |
show_search='filter',
|
| 192 |
+
pinned_columns=1,
|
| 193 |
elem_id='leaderboard'
|
| 194 |
)
|
| 195 |
|
|
|
|
| 200 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 201 |
qualified.change(lambda qualified: gr.Number(interactive=not qualified), inputs=qualified, outputs=min_ip)
|
| 202 |
pin_columns.input(
|
| 203 |
+
lambda pin: gr.DataFrame(pinned_columns=1 if pin else None),
|
| 204 |
inputs=pin_columns,
|
| 205 |
outputs=leaderboard
|
| 206 |
)
|
stats.py
CHANGED
|
@@ -8,6 +8,30 @@ from convert import verify_and_return_presult
|
|
| 8 |
|
| 9 |
valid_pitch = pl.col('x').is_not_null() & pl.col('y').is_not_null() & (pl.col('ballSpeed') > 0)
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
|
| 13 |
if start_date is not None:
|
|
@@ -105,21 +129,21 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
|
|
| 105 |
pl.when(pl.col('x').is_not_null() & pl.col('y').is_not_null() & (pl.col('ballSpeed') > 0)).then('mph').mean().round(1).alias('Avg MPH'),
|
| 106 |
pl.col('mph').max().alias('Max MPH'),
|
| 107 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
)
|
| 124 |
.with_columns(
|
| 125 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
|
@@ -170,6 +194,14 @@ def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both',
|
|
| 170 |
# pl.col('inning_code').unique().len().over(over_col).alias('IP'),
|
| 171 |
(pl.col('bso').struct.field('o').cast(pl.Int32) - pl.col('beforeBso').struct.field('o').cast(pl.Int32)).sum().mul(1/3).over(over_col).alias('IP'),
|
| 172 |
pl.col('pa_code').unique().len().over(over_col).alias('PA'),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
# pl.col('presult').is_in(verify_and_return_presult([
|
| 174 |
# 'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
|
| 175 |
# 'Foul fly', 'Foul line (?)',
|
|
@@ -190,9 +222,9 @@ def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both',
|
|
| 190 |
|
| 191 |
# percentile ascending/descending
|
| 192 |
if pitching:
|
| 193 |
-
stat_descending_pctl = lambda stat: stat in ['BB%', 'FB%', 'LD%', 'Z-Swing%'] or 'Contact%' in stat
|
| 194 |
else:
|
| 195 |
-
stat_descending_pctl = lambda stat: not (stat in ['BB%', 'FB%', 'LD%', 'Swing%', 'Z-Swing%'] or 'Contact%' in stat)
|
| 196 |
|
| 197 |
# col names
|
| 198 |
match player_type:
|
|
@@ -230,21 +262,27 @@ def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both',
|
|
| 230 |
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
| 231 |
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
| 232 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
pl.first('qualified')
|
| 249 |
)
|
| 250 |
.explode('batType')
|
|
@@ -259,7 +297,7 @@ def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both',
|
|
| 259 |
.drop('G', 'F', 'B', 'P', 'L')
|
| 260 |
.with_columns(
|
| 261 |
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat_descending_pctl(stat))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 262 |
-
for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 263 |
)
|
| 264 |
.sort(qual_col, descending=True)
|
| 265 |
)
|
|
|
|
| 8 |
|
| 9 |
valid_pitch = pl.col('x').is_not_null() & pl.col('y').is_not_null() & (pl.col('ballSpeed') > 0)
|
| 10 |
|
| 11 |
+
swing = (pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%')
|
| 12 |
+
z_swing = ((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%')
|
| 13 |
+
chase = ((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%')
|
| 14 |
+
contact = ((pl.col('swing') & ~pl.col('whiff')).sum()/pl.col('swing').sum()).alias('Contact%')
|
| 15 |
+
z_con = ((pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(pl.col('zone') & pl.col('swing')).sum()).alias('Z-Contact%')
|
| 16 |
+
o_con = ((~pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(~pl.col('zone') & pl.col('swing')).sum()).alias('O-Contact%')
|
| 17 |
+
whiff = (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%')
|
| 18 |
+
swstr = (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%')
|
| 19 |
+
csw = (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
|
| 20 |
+
zone = (pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%')
|
| 21 |
+
glove = (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%')
|
| 22 |
+
arm = (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%')
|
| 23 |
+
high = (pl.col('y') > 125).mean().alias('High%')
|
| 24 |
+
low = (pl.col('y') <= 125).mean().alias('Low%')
|
| 25 |
+
mm = (pl.col('x').is_between(-20, 20) & pl.col('y').is_between(100, 100+50)).mean().alias('MM%')
|
| 26 |
+
obp = (
|
| 27 |
+
pl.col('presult').is_in(verify_and_return_presult(['Single', 'Double', 'Triple', 'Home run', 'Walk', 'Inside-the-park home run', 'Hit by pitch'])).sum() /
|
| 28 |
+
(pl.col('AB').first() + pl.col('presult').is_in(verify_and_return_presult(['Walk', 'Hit by pitch', 'Sacrifice fly'])).sum())
|
| 29 |
+
).round(3).alias('OBP')
|
| 30 |
+
h = pl.col('presult').is_in(verify_and_return_presult(['Single', 'Double', 'Triple', 'Home run', 'Inside-the-park home run'])).sum().alias('H')
|
| 31 |
+
bb = pl.col('presult').is_in(verify_and_return_presult(['Walk'])).sum().alias('BB')
|
| 32 |
+
hbp = pl.col('presult').is_in(verify_and_return_presult(['Hit by pitch'])).sum().alias('HBP')
|
| 33 |
+
sf = pl.col('presult').is_in(verify_and_return_presult(['Sacrifice fly'])).sum().alias('SF')
|
| 34 |
+
|
| 35 |
|
| 36 |
def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
|
| 37 |
if start_date is not None:
|
|
|
|
| 129 |
pl.when(pl.col('x').is_not_null() & pl.col('y').is_not_null() & (pl.col('ballSpeed') > 0)).then('mph').mean().round(1).alias('Avg MPH'),
|
| 130 |
pl.col('mph').max().alias('Max MPH'),
|
| 131 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 132 |
+
swing,
|
| 133 |
+
z_swing,
|
| 134 |
+
chase,
|
| 135 |
+
contact,
|
| 136 |
+
z_con,
|
| 137 |
+
o_con,
|
| 138 |
+
whiff,
|
| 139 |
+
swstr,
|
| 140 |
+
csw,
|
| 141 |
+
zone,
|
| 142 |
+
glove,
|
| 143 |
+
arm,
|
| 144 |
+
high,
|
| 145 |
+
low,
|
| 146 |
+
mm,
|
| 147 |
)
|
| 148 |
.with_columns(
|
| 149 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
|
|
|
| 194 |
# pl.col('inning_code').unique().len().over(over_col).alias('IP'),
|
| 195 |
(pl.col('bso').struct.field('o').cast(pl.Int32) - pl.col('beforeBso').struct.field('o').cast(pl.Int32)).sum().mul(1/3).over(over_col).alias('IP'),
|
| 196 |
pl.col('pa_code').unique().len().over(over_col).alias('PA'),
|
| 197 |
+
pl.col('presult').is_in(verify_and_return_presult([
|
| 198 |
+
'Single', 'Double', 'Triple', 'Home run', 'Inside-the-park home run',
|
| 199 |
+
'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
|
| 200 |
+
'Foul fly', 'Foul line (?)',
|
| 201 |
+
'Error', 'Sacrifice hit error', 'Sacrifice fly error',
|
| 202 |
+
"Fielder's choice",
|
| 203 |
+
'Bunt strikeout', 'Swinging strikeout', 'Looking strikeout'
|
| 204 |
+
])).sum().over(over_col).alias('AB'),
|
| 205 |
# pl.col('presult').is_in(verify_and_return_presult([
|
| 206 |
# 'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
|
| 207 |
# 'Foul fly', 'Foul line (?)',
|
|
|
|
| 222 |
|
| 223 |
# percentile ascending/descending
|
| 224 |
if pitching:
|
| 225 |
+
stat_descending_pctl = lambda stat: stat in ['BB%', 'FB%', 'LD%', 'Z-Swing%', 'OBP'] or 'Contact%' in stat
|
| 226 |
else:
|
| 227 |
+
stat_descending_pctl = lambda stat: not (stat in ['BB%', 'FB%', 'LD%', 'Swing%', 'Z-Swing%', 'OBP'] or 'Contact%' in stat)
|
| 228 |
|
| 229 |
# col names
|
| 230 |
match player_type:
|
|
|
|
| 262 |
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
| 263 |
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
| 264 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 265 |
+
swing,
|
| 266 |
+
z_swing,
|
| 267 |
+
chase,
|
| 268 |
+
contact,
|
| 269 |
+
z_con,
|
| 270 |
+
o_con,
|
| 271 |
+
whiff,
|
| 272 |
+
swstr,
|
| 273 |
+
csw,
|
| 274 |
+
zone,
|
| 275 |
+
glove,
|
| 276 |
+
arm,
|
| 277 |
+
high,
|
| 278 |
+
low,
|
| 279 |
+
mm,
|
| 280 |
+
pl.col('AB').first(),
|
| 281 |
+
h,
|
| 282 |
+
bb,
|
| 283 |
+
hbp,
|
| 284 |
+
sf,
|
| 285 |
+
obp,
|
| 286 |
pl.first('qualified')
|
| 287 |
)
|
| 288 |
.explode('batType')
|
|
|
|
| 297 |
.drop('G', 'F', 'B', 'P', 'L')
|
| 298 |
.with_columns(
|
| 299 |
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat_descending_pctl(stat))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 300 |
+
for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'OBP']
|
| 301 |
)
|
| 302 |
.sort(qual_col, descending=True)
|
| 303 |
)
|