npb_data_app / pitcher_leaderboard.py
patrickramos's picture
Add team pitching leaderboard
d0e7981
import gradio as gr
import polars as pl
import numpy as np
from datetime import datetime
# from itertools import chain
from data import data_df
from stats import compute_player_stats, filter_data_by_date_and_game_kind
from convert import team_names_short_to_color, get_text_color_from_team
from plotting import stat_cmap
STATS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
PCT_STATS = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
STATS_WITH_PCTLS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
COLUMNS = ['Pitcher', 'Team', 'Throws', 'IP', 'TBF'] + STATS
TEAMS = [
'Yomiuri',
'Yakult',
'DeNA',
'Chunichi',
'Hanshin',
'Hiroshima',
'Nipponham',
'Rakuten',
'Seibu',
'Lotte',
'ORIX',
'SoftBank'
]
notes = '''**Limitations**
- [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
**To-do**
- Fix IP calculation
- Add post-season
'''
def gr_create_pitcher_leaderboard(start_date, end_date, min_ip, qualified, pitcher_lr='Both', batter_lr='Both', include_teams=None):
assert pitcher_lr in ['Both', 'Left', 'Right']
data = data_df.filter(pl.col('ballKind_code') != '-')
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
# typically "qualified" should be a valid input for min_ip for the current function,
# but we separate it from a numerical min_ip argument for API compabtibility
pitcher_stats = (
compute_player_stats(
data, player_type='pitcher',
pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
qual='qualified' if qualified else min_ip,
group_by_team=True
)
.filter(pl.col('qualified'))
.drop('pitId', 'qualified')
.rename({
'pitcher_name': 'Pitcher',
'pitcher_team_name_short': 'Team',
'PA': 'TBF'
})
.with_columns(
pl.col(stat).mul(100)
for stat in PCT_STATS
)
)
if include_teams is not None:
pitcher_stats = pitcher_stats.filter(pl.col('Team').is_in(include_teams))
styling = []
for i, row in enumerate(pitcher_stats[COLUMNS].iter_rows()):
styling_row = []
for col, item in zip(pitcher_stats[COLUMNS].columns, row):
_styling = 'font-size: 0.75em; '
if col in STATS_WITH_PCTLS:
r, g, b = (stat_cmap([pitcher_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
styling_row.append(f'background-color: rgba({r}, {g}, {b})')
elif col == 'Team':
styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
else:
styling_row.append('')
styling.append(styling_row)
display_value = []
for row in pitcher_stats[COLUMNS].iter_rows():
display_value_row = []
for col, item in zip(COLUMNS, row):
if col in PCT_STATS:
display_value_row.append(f'{item:.1f}%')
elif isinstance(item, float):
display_value_row.append(f'{item:.1f}')
else:
display_value_row.append(item)
display_value.append(display_value_row)
value = {
'data': pitcher_stats[COLUMNS].rows(),
'headers': COLUMNS,
'metadata': {
'styling': styling,
'display_value': display_value,
}
}
return value
def create_pitcher_leaderboard():
now = datetime.now()
start_datetime_init = datetime(now.year, 1, 1)
end_datetime_init = now
with gr.Blocks() as app:
gr.Markdown('# Pitcher Leaderboard')
with gr.Row():
start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
with gr.Row():
with gr.Group():
# min_ip_state = gr.State('qualified')
min_ip = gr.Number(100, label='Min. IP', precision=0, minimum=0, interactive=False)
qualified = gr.Checkbox(True, label='Qualified')
with gr.Group():
pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Pitcher handedness')
batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
with gr.Row():
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
all_teams = gr.Button('Select/Deselect all teams')
search = gr.Button('Search')
pin_columns = gr.Button('Pin columns')
leaderboard = gr.DataFrame(
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
column_widths=[125, 75, 60, 50, 50] + [max(50, 10*len(stat)) for stat in STATS],
show_copy_button=True,
show_search='filter',
pinned_columns=2,
elem_id='leaderboard'
)
gr.Markdown(notes)
search.click(gr_create_pitcher_leaderboard, inputs=[start_date, end_date, min_ip, qualified, pitcher_lr, batter_lr, include_teams], outputs=leaderboard)
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
# min_ip_state_kwargs = dict(fn=lambda min_ip, qualified: ('qualified' if qualified else min_ip, gr.Number(interactive=not qualified)), inputs=[min_ip, qualified], outputs=[min_ip_state, min_ip])
# min_ip.change(**min_ip_state_kwargs)
# qualified.change(**min_ip_state_kwargs)
qualified.change(lambda qualified: gr.Number(interactive=not qualified), inputs=qualified, outputs=min_ip)
# pin_columns.input(
# lambda _pin_columns : gr.update(pinned_columns=None if _pin_columns else 3),
# inputs=pin_columns,
# outputs=leaderboard
# )
# pin_columns_state = gr.State(True)
pin_columns.click(
lambda : gr.Dataframe(pinned_columns=None),
outputs=leaderboard
)
return app
if __name__ == '__main__':
app = create_pitcher_leaderboard()
app.launch()