Spaces:
Running
Running
Commit
·
d0e7981
1
Parent(s):
9f7c1e1
Add team pitching leaderboard
Browse files- acknowledgements.md +1 -1
- app.py +6 -2
- data.py +1 -2
- pitcher_leaderboard.py +2 -2
- player_team_leaderboard.py +210 -0
- stats.py +64 -24
acknowledgements.md
CHANGED
|
@@ -6,4 +6,4 @@ The approach to visualizing pitch locations was inspired by [Stephen-Sutton Brow
|
|
| 6 |
|
| 7 |
The attempt to be more aesthetically-minded (ex. more conscious focus on font choice and legibility) was inspired by the hockey player cards of [JFresh](https://x.com/JFreshHockey).
|
| 8 |
|
| 9 |
-
Thanks to [ぼーのさん](https://bo-no05.hatenadiary.org/) for answering some of
|
|
|
|
| 6 |
|
| 7 |
The attempt to be more aesthetically-minded (ex. more conscious focus on font choice and legibility) was inspired by the hockey player cards of [JFresh](https://x.com/JFreshHockey).
|
| 8 |
|
| 9 |
+
Thanks to [ぼーのさん](https://bo-no05.hatenadiary.org/) for answering some of our questions while making this.
|
app.py
CHANGED
|
@@ -3,7 +3,8 @@ import matplotlib as mpl
|
|
| 3 |
|
| 4 |
from data import data_df
|
| 5 |
from pitcher_overview import create_pitcher_overview
|
| 6 |
-
from pitcher_leaderboard import create_pitcher_leaderboard
|
|
|
|
| 7 |
from pitch_leaderboard import create_pitch_leaderboard
|
| 8 |
from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
|
| 9 |
from css import css
|
|
@@ -16,7 +17,7 @@ with open('updated.txt') as f:
|
|
| 16 |
|
| 17 |
limitations = '''**General Limitations**
|
| 18 |
- As new players make their debut, some names may be missing or not translated/transliterated correctly.
|
| 19 |
-
- IP is
|
| 20 |
'''
|
| 21 |
|
| 22 |
with open('acknowledgements.md', 'r') as f:
|
|
@@ -30,11 +31,14 @@ if __name__ == '__main__':
|
|
| 30 |
create_pitcher_leaderboard()
|
| 31 |
with gr.Tab('Pitch Leaderboard'):
|
| 32 |
create_pitch_leaderboard()
|
|
|
|
|
|
|
| 33 |
with gr.Tab('Daily/Weekly Leaderboard'):
|
| 34 |
create_daily_weekly_leaderboard_app(data_df)
|
| 35 |
with gr.Tab('Acknowledgements'):
|
| 36 |
gr.Markdown(acknowledgements)
|
| 37 |
|
|
|
|
| 38 |
gr.Markdown('---')
|
| 39 |
gr.Markdown(f'**Data up to:** {latest_data_date}')
|
| 40 |
gr.Markdown(f'**Last updated:** {updated}')
|
|
|
|
| 3 |
|
| 4 |
from data import data_df
|
| 5 |
from pitcher_overview import create_pitcher_overview
|
| 6 |
+
# from pitcher_leaderboard import create_pitcher_leaderboard
|
| 7 |
+
from player_team_leaderboard import create_pitcher_leaderboard, create_team_pitching_leaderboard
|
| 8 |
from pitch_leaderboard import create_pitch_leaderboard
|
| 9 |
from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
|
| 10 |
from css import css
|
|
|
|
| 17 |
|
| 18 |
limitations = '''**General Limitations**
|
| 19 |
- As new players make their debut, some names may be missing or not translated/transliterated correctly.
|
| 20 |
+
- IP is underestimated (does not count outs via caught stealing)
|
| 21 |
'''
|
| 22 |
|
| 23 |
with open('acknowledgements.md', 'r') as f:
|
|
|
|
| 31 |
create_pitcher_leaderboard()
|
| 32 |
with gr.Tab('Pitch Leaderboard'):
|
| 33 |
create_pitch_leaderboard()
|
| 34 |
+
with gr.Tab('Team Pitching Leaderboard'):
|
| 35 |
+
create_team_pitching_leaderboard()
|
| 36 |
with gr.Tab('Daily/Weekly Leaderboard'):
|
| 37 |
create_daily_weekly_leaderboard_app(data_df)
|
| 38 |
with gr.Tab('Acknowledgements'):
|
| 39 |
gr.Markdown(acknowledgements)
|
| 40 |
|
| 41 |
+
|
| 42 |
gr.Markdown('---')
|
| 43 |
gr.Markdown(f'**Data up to:** {latest_data_date}')
|
| 44 |
gr.Markdown(f'**Last updated:** {updated}')
|
data.py
CHANGED
|
@@ -148,7 +148,7 @@ data_df = (
|
|
| 148 |
)
|
| 149 |
.join(
|
| 150 |
(
|
| 151 |
-
aux_df.filter(~pl.col('ibb'))[['universal_code', 'battingResult', 'inning_pas', 'pa_pitches']]
|
| 152 |
.rename({'battingResult': 'aux_bresult', 'inning_pas': 'aux_inning_pas', 'pa_pitches': 'aux_pa_pitches'})
|
| 153 |
),
|
| 154 |
on='universal_code',
|
|
@@ -214,7 +214,6 @@ data_df = (
|
|
| 214 |
.filter(pl.col('ballKind_code') != '-')
|
| 215 |
.unique()
|
| 216 |
)
|
| 217 |
-
bar = data_df
|
| 218 |
|
| 219 |
|
| 220 |
def select_name(names):
|
|
|
|
| 148 |
)
|
| 149 |
.join(
|
| 150 |
(
|
| 151 |
+
aux_df.filter(~pl.col('ibb'))[['universal_code', 'battingResult', 'inning_pas', 'pa_pitches', 'beforeBso', 'bso']]
|
| 152 |
.rename({'battingResult': 'aux_bresult', 'inning_pas': 'aux_inning_pas', 'pa_pitches': 'aux_pa_pitches'})
|
| 153 |
),
|
| 154 |
on='universal_code',
|
|
|
|
| 214 |
.filter(pl.col('ballKind_code') != '-')
|
| 215 |
.unique()
|
| 216 |
)
|
|
|
|
| 217 |
|
| 218 |
|
| 219 |
def select_name(names):
|
pitcher_leaderboard.py
CHANGED
|
@@ -52,7 +52,7 @@ def gr_create_pitcher_leaderboard(start_date, end_date, min_ip, qualified, pitch
|
|
| 52 |
data, player_type='pitcher',
|
| 53 |
pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
|
| 54 |
batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
|
| 55 |
-
|
| 56 |
group_by_team=True
|
| 57 |
)
|
| 58 |
.filter(pl.col('qualified'))
|
|
@@ -156,7 +156,7 @@ def create_pitcher_leaderboard():
|
|
| 156 |
# )
|
| 157 |
# pin_columns_state = gr.State(True)
|
| 158 |
pin_columns.click(
|
| 159 |
-
lambda : gr.
|
| 160 |
outputs=leaderboard
|
| 161 |
)
|
| 162 |
|
|
|
|
| 52 |
data, player_type='pitcher',
|
| 53 |
pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
|
| 54 |
batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
|
| 55 |
+
qual='qualified' if qualified else min_ip,
|
| 56 |
group_by_team=True
|
| 57 |
)
|
| 58 |
.filter(pl.col('qualified'))
|
|
|
|
| 156 |
# )
|
| 157 |
# pin_columns_state = gr.State(True)
|
| 158 |
pin_columns.click(
|
| 159 |
+
lambda : gr.Dataframe(pinned_columns=None),
|
| 160 |
outputs=leaderboard
|
| 161 |
)
|
| 162 |
|
player_team_leaderboard.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import polars as pl
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from functools import partial
|
| 7 |
+
|
| 8 |
+
from data import data_df
|
| 9 |
+
from stats import compute_player_stats, filter_data_by_date_and_game_kind
|
| 10 |
+
from convert import team_names_short_to_color, get_text_color_from_team
|
| 11 |
+
from plotting import stat_cmap
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
TEAMS = [
|
| 15 |
+
'Yomiuri',
|
| 16 |
+
'Yakult',
|
| 17 |
+
'DeNA',
|
| 18 |
+
'Chunichi',
|
| 19 |
+
'Hanshin',
|
| 20 |
+
'Hiroshima',
|
| 21 |
+
'Nipponham',
|
| 22 |
+
'Rakuten',
|
| 23 |
+
'Seibu',
|
| 24 |
+
'Lotte',
|
| 25 |
+
'ORIX',
|
| 26 |
+
'SoftBank'
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
notes = '''**Limitations**
|
| 31 |
+
- [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
|
| 32 |
+
|
| 33 |
+
**To-do**
|
| 34 |
+
- Fix IP calculation
|
| 35 |
+
- Add post-season
|
| 36 |
+
'''
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def get_col_width(col, player_team_type):
|
| 40 |
+
match col:
|
| 41 |
+
case 'Pitcher' | 'Batter':
|
| 42 |
+
return 125
|
| 43 |
+
case 'Team':
|
| 44 |
+
return 75
|
| 45 |
+
case 'Throws' | 'Bats':
|
| 46 |
+
return 60
|
| 47 |
+
case 'IP':
|
| 48 |
+
return 60 if player_team_type == 'team pitching' else 50
|
| 49 |
+
case _:
|
| 50 |
+
return max(50, 10*len(col))
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def create_player_team_leaderboard_app(player_team_type):
|
| 54 |
+
pitching = player_team_type in ('pitcher', 'team pitching')
|
| 55 |
+
team = 'team' in player_team_type
|
| 56 |
+
|
| 57 |
+
# stats
|
| 58 |
+
if pitching:
|
| 59 |
+
pct_stats = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 60 |
+
stats_with_pctls = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 61 |
+
cols = ['Pitcher', 'Team', 'Throws', 'IP', 'TBF', 'FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 62 |
+
if team:
|
| 63 |
+
cols = [col for col in cols if col not in ('Pitcher', 'Throws')]
|
| 64 |
+
else:
|
| 65 |
+
pct_stats = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 66 |
+
stats_with_pctls = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 67 |
+
cols = ['Batter', 'Team', 'Bats', 'PA', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 68 |
+
if team:
|
| 69 |
+
cols = [col for col in cols if col not in ('Batter', 'Bats')]
|
| 70 |
+
|
| 71 |
+
# col names
|
| 72 |
+
player_type = 'pitcher' if pitching else 'batter'
|
| 73 |
+
id_col = f'{player_type[:3].lower()}Id' if not team else f'{player_type}_team_name_short'
|
| 74 |
+
qual_name = 'IP' if pitching else 'PA'
|
| 75 |
+
|
| 76 |
+
def gr_create_player_team_leaderboard(start_date, end_date, min_qual, qualified, pitcher_lr='Both', batter_lr='Both', include_teams=None):
|
| 77 |
+
assert pitcher_lr in ['Both', 'Left', 'Right']
|
| 78 |
+
assert batter_lr in ['Both', 'Left', 'Right']
|
| 79 |
+
|
| 80 |
+
data = data_df.filter(pl.col('ballKind_code') != '-')
|
| 81 |
+
|
| 82 |
+
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
| 83 |
+
|
| 84 |
+
rename = {f'{player_type}_team_name_short': 'Team'}
|
| 85 |
+
if not team:
|
| 86 |
+
rename[f'{player_type}_name'] = player_type.title()
|
| 87 |
+
if pitching:
|
| 88 |
+
rename['PA'] = 'TBF'
|
| 89 |
+
|
| 90 |
+
# typically "qualified" should be a valid input for min_ip for the current function,
|
| 91 |
+
# but we separate it from a numerical min_ip argument for API compabtibility
|
| 92 |
+
pitcher_stats = (
|
| 93 |
+
compute_player_stats(
|
| 94 |
+
data, player_type=player_team_type,
|
| 95 |
+
pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
|
| 96 |
+
batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
|
| 97 |
+
qual='qualified' if qualified else min_qual,
|
| 98 |
+
group_by_team=not team
|
| 99 |
+
)
|
| 100 |
+
.filter(pl.col('qualified'))
|
| 101 |
+
.drop(['qualified'] + ([id_col] if not team else []))
|
| 102 |
+
.rename(rename)
|
| 103 |
+
.with_columns(
|
| 104 |
+
pl.col(stat).mul(100)
|
| 105 |
+
for stat in pct_stats
|
| 106 |
+
)
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# if not team:
|
| 110 |
+
if include_teams is not None:
|
| 111 |
+
pitcher_stats = pitcher_stats.filter(pl.col('Team').is_in(include_teams))
|
| 112 |
+
|
| 113 |
+
styling = []
|
| 114 |
+
for i, row in enumerate(pitcher_stats[cols].iter_rows()):
|
| 115 |
+
styling_row = []
|
| 116 |
+
for col, item in zip(pitcher_stats[cols].columns, row):
|
| 117 |
+
_styling = 'font-size: 0.75em; '
|
| 118 |
+
if col in stats_with_pctls:
|
| 119 |
+
r, g, b = (stat_cmap([pitcher_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
|
| 120 |
+
styling_row.append(f'background-color: rgba({r}, {g}, {b})')
|
| 121 |
+
elif col == 'Team':
|
| 122 |
+
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
|
| 123 |
+
else:
|
| 124 |
+
styling_row.append('')
|
| 125 |
+
styling.append(styling_row)
|
| 126 |
+
|
| 127 |
+
display_value = []
|
| 128 |
+
for row in pitcher_stats[cols].iter_rows():
|
| 129 |
+
display_value_row = []
|
| 130 |
+
for col, item in zip(cols, row):
|
| 131 |
+
if col in pct_stats:
|
| 132 |
+
display_value_row.append(f'{item:.1f}%')
|
| 133 |
+
elif isinstance(item, float):
|
| 134 |
+
display_value_row.append(f'{item:.1f}')
|
| 135 |
+
else:
|
| 136 |
+
display_value_row.append(item)
|
| 137 |
+
display_value.append(display_value_row)
|
| 138 |
+
|
| 139 |
+
value = {
|
| 140 |
+
'data': pitcher_stats[cols].rows(),
|
| 141 |
+
'headers': cols,
|
| 142 |
+
'metadata': {
|
| 143 |
+
'styling': styling,
|
| 144 |
+
'display_value': display_value,
|
| 145 |
+
}
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
return value
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
now = datetime.now()
|
| 152 |
+
start_datetime_init = datetime(now.year, 1, 1)
|
| 153 |
+
end_datetime_init = now
|
| 154 |
+
with gr.Blocks() as app:
|
| 155 |
+
gr.Markdown(f'# {player_team_type.title()} Leaderboard')
|
| 156 |
+
with gr.Row():
|
| 157 |
+
start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
|
| 158 |
+
end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
|
| 159 |
+
with gr.Row():
|
| 160 |
+
if not team:
|
| 161 |
+
with gr.Group():
|
| 162 |
+
min_ip = gr.Number(100, label=f'Min. {qual_name}', precision=0, minimum=0, interactive=False)
|
| 163 |
+
qualified = gr.Checkbox(True, label='Qualified')
|
| 164 |
+
else:
|
| 165 |
+
min_ip = gr.State(0)
|
| 166 |
+
qualified = gr.State(False)
|
| 167 |
+
with gr.Group():
|
| 168 |
+
pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Pitcher handedness')
|
| 169 |
+
batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
|
| 170 |
+
with gr.Row():
|
| 171 |
+
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
| 172 |
+
all_teams = gr.Button('Select/Deselect all teams')
|
| 173 |
+
|
| 174 |
+
search = gr.Button('Search')
|
| 175 |
+
pin_columns = gr.Checkbox(True, label='Pin columns')
|
| 176 |
+
leaderboard = gr.DataFrame(
|
| 177 |
+
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 178 |
+
column_widths=[get_col_width(col, player_team_type) for col in cols],
|
| 179 |
+
show_copy_button=True,
|
| 180 |
+
show_search='filter',
|
| 181 |
+
pinned_columns=2,
|
| 182 |
+
elem_id='leaderboard'
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
gr.Markdown(notes)
|
| 186 |
+
|
| 187 |
+
search.click(gr_create_player_team_leaderboard, inputs=[start_date, end_date, min_ip, qualified, pitcher_lr, batter_lr, include_teams], outputs=leaderboard)
|
| 188 |
+
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 189 |
+
qualified.change(lambda qualified: gr.Number(interactive=not qualified), inputs=qualified, outputs=min_ip)
|
| 190 |
+
pin_columns.input(
|
| 191 |
+
lambda pin: gr.DataFrame(pinned_columns=2 if pin else None),
|
| 192 |
+
inputs=pin_columns,
|
| 193 |
+
outputs=leaderboard
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
return app
|
| 197 |
+
|
| 198 |
+
create_pitcher_leaderboard = partial(
|
| 199 |
+
create_player_team_leaderboard_app,
|
| 200 |
+
player_team_type='pitcher'
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
create_team_pitching_leaderboard = partial(
|
| 204 |
+
create_player_team_leaderboard_app,
|
| 205 |
+
player_team_type='team pitching'
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
if __name__ == '__main__':
|
| 209 |
+
app = foo()
|
| 210 |
+
app.launch()
|
stats.py
CHANGED
|
@@ -75,6 +75,8 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
|
|
| 75 |
assert player_type in ('pitcher', 'batter')
|
| 76 |
assert pitch_class_type in ('general', 'specific')
|
| 77 |
|
|
|
|
|
|
|
| 78 |
if pitcher_lr != 'both':
|
| 79 |
data = data.filter(pl.col('pitLR') == pitcher_lr)
|
| 80 |
|
|
@@ -82,8 +84,9 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
|
|
| 82 |
data = data.filter(pl.col('batLR') == batter_lr)
|
| 83 |
|
| 84 |
id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
|
|
|
|
| 85 |
if group_by_team:
|
| 86 |
-
id_cols.append(
|
| 87 |
name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
|
| 88 |
pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
|
| 89 |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
|
@@ -141,15 +144,32 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
|
|
| 141 |
)
|
| 142 |
return pitch_stats
|
| 143 |
|
| 144 |
-
def compute_player_stats(data, player_type,
|
| 145 |
assert pitcher_lr in ('both', 'l', 'r')
|
| 146 |
assert batter_lr in ('both', 'l', 'r')
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
data = (
|
| 149 |
compute_team_games(data)
|
| 150 |
.with_columns(
|
| 151 |
pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
|
| 152 |
-
pl.col('inning_code').unique().len().over(
|
|
|
|
|
|
|
| 153 |
# pl.col('presult').is_in(verify_and_return_presult([
|
| 154 |
# 'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
|
| 155 |
# 'Foul fly', 'Foul line (?)',
|
|
@@ -160,32 +180,52 @@ def compute_player_stats(data, player_type, min_ip='qualified', pitcher_lr='both
|
|
| 160 |
)
|
| 161 |
)
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
| 165 |
else:
|
| 166 |
-
data = data.with_columns((pl.col(
|
| 167 |
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
player_stats = (
|
| 180 |
data
|
| 181 |
.with_columns(pl.when(pl.col('general_ballKind_code').is_in(['4S', 'FC', 'SI'])).then(pl.when(valid_pitch).then('ballSpeed').mean().over('pitId', 'general_ballKind_code')).mul(1/1.609).round(1).alias('FB Velo'))
|
| 182 |
.group_by(id_cols)
|
| 183 |
.agg(
|
| 184 |
-
pl.col(name_col).first(),
|
| 185 |
-
*([] if group_by_team else [pl.col(
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
pl.col('IP').first(),
|
| 188 |
-
pl.col('
|
| 189 |
pl.col('FB Velo').max(),
|
| 190 |
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
| 191 |
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
|
@@ -218,10 +258,10 @@ def compute_player_stats(data, player_type, min_ip='qualified', pitcher_lr='both
|
|
| 218 |
)
|
| 219 |
.drop('G', 'F', 'B', 'P', 'L')
|
| 220 |
.with_columns(
|
| 221 |
-
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat
|
| 222 |
for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 223 |
)
|
| 224 |
-
.sort(
|
| 225 |
)
|
| 226 |
return player_stats
|
| 227 |
|
|
@@ -248,5 +288,5 @@ def get_pitcher_stats(id, lr='both', game_kind=None, start_date=None, end_date=N
|
|
| 248 |
.with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
|
| 249 |
)
|
| 250 |
|
| 251 |
-
pitcher_stats = compute_player_stats(source_data, player_type='pitcher',
|
| 252 |
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|
|
|
|
| 75 |
assert player_type in ('pitcher', 'batter')
|
| 76 |
assert pitch_class_type in ('general', 'specific')
|
| 77 |
|
| 78 |
+
pitching = player_type in ('pitcher', )
|
| 79 |
+
|
| 80 |
if pitcher_lr != 'both':
|
| 81 |
data = data.filter(pl.col('pitLR') == pitcher_lr)
|
| 82 |
|
|
|
|
| 84 |
data = data.filter(pl.col('batLR') == batter_lr)
|
| 85 |
|
| 86 |
id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
|
| 87 |
+
team_col = 'pitcher_team_name_short' if pitching else 'batter_team_name_short'
|
| 88 |
if group_by_team:
|
| 89 |
+
id_cols.append(team_col)
|
| 90 |
name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
|
| 91 |
pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
|
| 92 |
pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
|
|
|
| 144 |
)
|
| 145 |
return pitch_stats
|
| 146 |
|
| 147 |
+
def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both', batter_lr='both', group_by_team=False):
|
| 148 |
assert pitcher_lr in ('both', 'l', 'r')
|
| 149 |
assert batter_lr in ('both', 'l', 'r')
|
| 150 |
+
assert player_type in ('pitcher', 'batter', 'team pitching', 'team batting')
|
| 151 |
+
|
| 152 |
+
# pitching or batting, player or team
|
| 153 |
+
pitching = player_type in ('pitcher', 'team pitching')
|
| 154 |
+
team = player_type in ('team pitching', 'team batting')
|
| 155 |
+
|
| 156 |
+
# handedness filters
|
| 157 |
+
if pitcher_lr != 'both':
|
| 158 |
+
data = data.filter(pl.col('pitLR') == pitcher_lr)
|
| 159 |
+
if batter_lr != 'both':
|
| 160 |
+
data = data.filter(pl.col('batLR') == batter_lr)
|
| 161 |
+
|
| 162 |
+
if pitching:
|
| 163 |
+
over_col = 'pitId' if not team else 'pitcher_team_name_short'
|
| 164 |
+
else:
|
| 165 |
+
over_col = 'batId' if not team else 'batter_team_name_short'
|
| 166 |
data = (
|
| 167 |
compute_team_games(data)
|
| 168 |
.with_columns(
|
| 169 |
pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
|
| 170 |
+
# pl.col('inning_code').unique().len().over(over_col).alias('IP'),
|
| 171 |
+
(pl.col('bso').struct.field('o').cast(pl.Int32) - pl.col('beforeBso').struct.field('o').cast(pl.Int32)).sum().mul(1/3).over(over_col).alias('IP'),
|
| 172 |
+
pl.col('pa_code').unique().len().over(over_col).alias('PA'),
|
| 173 |
# pl.col('presult').is_in(verify_and_return_presult([
|
| 174 |
# 'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
|
| 175 |
# 'Foul fly', 'Foul line (?)',
|
|
|
|
| 180 |
)
|
| 181 |
)
|
| 182 |
|
| 183 |
+
# qualifiers
|
| 184 |
+
qualified_factor = 1 if pitching else 3.1
|
| 185 |
+
qual_col = 'IP' if pitching else 'PA'
|
| 186 |
+
if qual == 'qualified':
|
| 187 |
+
data = data.with_columns((pl.col(qual_col) >= qualified_factor * pl.col('games')).alias('qualified'))
|
| 188 |
else:
|
| 189 |
+
data = data.with_columns((pl.col(qual_col) >= qual).alias('qualified'))
|
| 190 |
|
| 191 |
+
# percentile ascending/descending
|
| 192 |
+
if pitching:
|
| 193 |
+
stat_descending_pctl = lambda stat: stat in ['BB%', 'FB%', 'LD%', 'Z-Swing%'] or 'Contact%' in stat
|
| 194 |
+
else:
|
| 195 |
+
stat_descending_pctl = lambda stat: not (stat in ['BB%', 'FB%', 'LD%', 'Swing%', 'Z-Swing%'] or 'Contact%' in stat)
|
| 196 |
|
| 197 |
+
# col names
|
| 198 |
+
match player_type:
|
| 199 |
+
case 'pitcher':
|
| 200 |
+
id_cols = ['pitId']
|
| 201 |
+
name_col = 'pitcher_name'
|
| 202 |
+
case 'batter':
|
| 203 |
+
id_cols = ['batId']
|
| 204 |
+
name_col = 'batter_name'
|
| 205 |
+
case _:
|
| 206 |
+
id_cols = []
|
| 207 |
+
name_col = None
|
| 208 |
+
|
| 209 |
+
team_col = 'pitcher_team_name_short' if pitching else 'batter_team_name_short'
|
| 210 |
+
if group_by_team or team:
|
| 211 |
+
id_cols.append(team_col)
|
| 212 |
+
|
| 213 |
+
handedness_col = 'pitLR' if pitching else 'batLR'
|
| 214 |
+
new_handedness_col = 'Throws' if pitching else 'Bats'
|
| 215 |
player_stats = (
|
| 216 |
data
|
| 217 |
.with_columns(pl.when(pl.col('general_ballKind_code').is_in(['4S', 'FC', 'SI'])).then(pl.when(valid_pitch).then('ballSpeed').mean().over('pitId', 'general_ballKind_code')).mul(1/1.609).round(1).alias('FB Velo'))
|
| 218 |
.group_by(id_cols)
|
| 219 |
.agg(
|
| 220 |
+
*([pl.col(name_col).first()] if not team else []),
|
| 221 |
+
*([] if group_by_team or team else [pl.col(team_col).last()]),
|
| 222 |
+
*(
|
| 223 |
+
[pl.col(handedness_col).first().str.to_uppercase().alias(new_handedness_col) ]
|
| 224 |
+
if not (team and ((pitcher_lr == 'both') if pitching else (batter_lr == 'both')))
|
| 225 |
+
else []
|
| 226 |
+
),
|
| 227 |
pl.col('IP').first(),
|
| 228 |
+
pl.col('PA').first(),
|
| 229 |
pl.col('FB Velo').max(),
|
| 230 |
(pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
|
| 231 |
(pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
|
|
|
|
| 258 |
)
|
| 259 |
.drop('G', 'F', 'B', 'P', 'L')
|
| 260 |
.with_columns(
|
| 261 |
+
(pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat_descending_pctl(stat))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 262 |
for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 263 |
)
|
| 264 |
+
.sort(qual_col, descending=True)
|
| 265 |
)
|
| 266 |
return player_stats
|
| 267 |
|
|
|
|
| 288 |
.with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
|
| 289 |
)
|
| 290 |
|
| 291 |
+
pitcher_stats = compute_player_stats(source_data, player_type='pitcher', qual=min_ip, batter_lr=lr, group_by_team=False).filter(pl.col('pitId') == id)
|
| 292 |
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|