Spaces:
Running
Running
Commit
·
ac9071c
1
Parent(s):
31b84c9
Add pitcher handedness filtering
Browse files- README.md +1 -1
- pitch_leaderboard.py +17 -9
- pitcher_leaderboard.py +15 -9
- stats.py +29 -18
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: ⚾️
|
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: red
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
python_version: 3.13.5
|
|
|
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: red
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.44.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
python_version: 3.13.5
|
pitch_leaderboard.py
CHANGED
|
@@ -13,7 +13,7 @@ from plotting import stat_cmap
|
|
| 13 |
STATS = ['Count', 'Usage', 'Avg Velo', 'Max Velo', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 14 |
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 15 |
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 16 |
-
COLUMNS = ['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS
|
| 17 |
|
| 18 |
PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
|
| 19 |
TEAMS = [
|
|
@@ -38,17 +38,23 @@ notes = '''**Limitations**
|
|
| 38 |
'''
|
| 39 |
|
| 40 |
|
| 41 |
-
def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='Both', include_pitches=PITCH_TYPES, include_teams=None):
|
| 42 |
assert pitcher_lr in ['Both', 'Left', 'Right']
|
| 43 |
|
| 44 |
data = data_df.filter(pl.col('ballKind_code') != '-')
|
| 45 |
|
| 46 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
| 47 |
-
if pitcher_lr != 'Both':
|
| 48 |
-
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
| 49 |
|
| 50 |
pitch_stats = (
|
| 51 |
-
compute_pitch_stats(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
.filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
| 53 |
.drop('pitId', 'ballKind_code', 'qualified')
|
| 54 |
.rename({
|
|
@@ -123,10 +129,12 @@ def create_pitch_leaderboard():
|
|
| 123 |
start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
|
| 124 |
end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
|
| 125 |
with gr.Row():
|
| 126 |
-
|
| 127 |
-
|
| 128 |
all_pitches = gr.Button('Select/Deselect all pitches')
|
|
|
|
| 129 |
min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
|
|
|
|
| 130 |
batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
|
| 131 |
with gr.Row():
|
| 132 |
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
|
@@ -136,7 +144,7 @@ def create_pitch_leaderboard():
|
|
| 136 |
pin_columns = gr.Button('Pin columns')
|
| 137 |
leaderboard = gr.DataFrame(
|
| 138 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 139 |
-
column_widths=[125, 75, 125, 125] + [max(50, 10*len(stat)) for stat in STATS],
|
| 140 |
show_copy_button=True,
|
| 141 |
show_search='filter',
|
| 142 |
pinned_columns=3,
|
|
@@ -145,7 +153,7 @@ def create_pitch_leaderboard():
|
|
| 145 |
|
| 146 |
gr.Markdown(notes)
|
| 147 |
|
| 148 |
-
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, batter_lr, include_pitches, include_teams], outputs=leaderboard)
|
| 149 |
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
| 150 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 151 |
# pin_columns.input(
|
|
|
|
| 13 |
STATS = ['Count', 'Usage', 'Avg Velo', 'Max Velo', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 14 |
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 15 |
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 16 |
+
COLUMNS = ['Pitcher', 'Team', 'Throws', 'Pitch', 'Pitch (General)'] + STATS
|
| 17 |
|
| 18 |
PITCH_TYPES = [pitch_type for pitch_type in ball_kind.values() if pitch_type != '-']
|
| 19 |
TEAMS = [
|
|
|
|
| 38 |
'''
|
| 39 |
|
| 40 |
|
| 41 |
+
def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='Both', batter_lr='Both', include_pitches=PITCH_TYPES, include_teams=None):
|
| 42 |
assert pitcher_lr in ['Both', 'Left', 'Right']
|
| 43 |
|
| 44 |
data = data_df.filter(pl.col('ballKind_code') != '-')
|
| 45 |
|
| 46 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
|
|
|
|
|
|
| 47 |
|
| 48 |
pitch_stats = (
|
| 49 |
+
compute_pitch_stats(
|
| 50 |
+
data,
|
| 51 |
+
player_type='pitcher',
|
| 52 |
+
min_pitches=min_pitches,
|
| 53 |
+
pitch_class_type='specific',
|
| 54 |
+
pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
|
| 55 |
+
batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
|
| 56 |
+
group_by_team=True
|
| 57 |
+
)
|
| 58 |
.filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
|
| 59 |
.drop('pitId', 'ballKind_code', 'qualified')
|
| 60 |
.rename({
|
|
|
|
| 129 |
start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
|
| 130 |
end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
|
| 131 |
with gr.Row():
|
| 132 |
+
with gr.Column(scale=3):
|
| 133 |
+
include_pitches = gr.CheckboxGroup(PITCH_TYPES, value=PITCH_TYPES, label='Pitches')
|
| 134 |
all_pitches = gr.Button('Select/Deselect all pitches')
|
| 135 |
+
with gr.Column(scale=1):
|
| 136 |
min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
|
| 137 |
+
pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Pitcher handedness')
|
| 138 |
batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
|
| 139 |
with gr.Row():
|
| 140 |
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
|
|
|
| 144 |
pin_columns = gr.Button('Pin columns')
|
| 145 |
leaderboard = gr.DataFrame(
|
| 146 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 147 |
+
column_widths=[125, 75, 60, 125, 125] + [max(50, 10*len(stat)) for stat in STATS],
|
| 148 |
show_copy_button=True,
|
| 149 |
show_search='filter',
|
| 150 |
pinned_columns=3,
|
|
|
|
| 153 |
|
| 154 |
gr.Markdown(notes)
|
| 155 |
|
| 156 |
+
search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, pitcher_lr, batter_lr, include_pitches, include_teams], outputs=leaderboard)
|
| 157 |
all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
|
| 158 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 159 |
# pin_columns.input(
|
pitcher_leaderboard.py
CHANGED
|
@@ -13,7 +13,7 @@ from plotting import stat_cmap
|
|
| 13 |
STATS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 14 |
PCT_STATS = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 15 |
STATS_WITH_PCTLS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 16 |
-
COLUMNS = ['Pitcher', 'Team', 'IP', 'TBF'] + STATS
|
| 17 |
|
| 18 |
TEAMS = [
|
| 19 |
'Yomiuri',
|
|
@@ -38,17 +38,21 @@ notes = '''**Limitations**
|
|
| 38 |
'''
|
| 39 |
|
| 40 |
|
| 41 |
-
def gr_create_pitcher_leaderboard(start_date, end_date, min_ip, pitcher_lr='Both', include_teams=None):
|
| 42 |
assert pitcher_lr in ['Both', 'Left', 'Right']
|
| 43 |
|
| 44 |
data = data_df.filter(pl.col('ballKind_code') != '-')
|
| 45 |
|
| 46 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
| 47 |
-
if pitcher_lr != 'Both':
|
| 48 |
-
data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
|
| 49 |
|
| 50 |
pitcher_stats = (
|
| 51 |
-
compute_player_stats(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
.filter(pl.col('qualified'))
|
| 53 |
.drop('pitId', 'qualified')
|
| 54 |
.rename({
|
|
@@ -117,7 +121,9 @@ def create_pitcher_leaderboard():
|
|
| 117 |
min_ip_state = gr.State('qualified')
|
| 118 |
min_ip = gr.Number(100, label='Min. IP', precision=0, minimum=0, interactive=False)
|
| 119 |
qualified = gr.Checkbox(True, label='Qualified')
|
| 120 |
-
|
|
|
|
|
|
|
| 121 |
with gr.Row():
|
| 122 |
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
| 123 |
all_teams = gr.Button('Select/Deselect all teams')
|
|
@@ -126,7 +132,7 @@ def create_pitcher_leaderboard():
|
|
| 126 |
pin_columns = gr.Button('Pin columns')
|
| 127 |
leaderboard = gr.DataFrame(
|
| 128 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 129 |
-
column_widths=[125, 75, 50, 50] + [max(50, 10*len(stat)) for stat in STATS],
|
| 130 |
show_copy_button=True,
|
| 131 |
show_search='filter',
|
| 132 |
pinned_columns=2,
|
|
@@ -135,9 +141,9 @@ def create_pitcher_leaderboard():
|
|
| 135 |
|
| 136 |
gr.Markdown(notes)
|
| 137 |
|
| 138 |
-
search.click(gr_create_pitcher_leaderboard, inputs=[start_date, end_date, min_ip_state, batter_lr, include_teams], outputs=leaderboard)
|
| 139 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 140 |
-
min_ip_state_kwargs = dict(fn=lambda min_ip, qualified: (qualified if qualified else min_ip, gr.Number(interactive=not qualified)), inputs=[min_ip, qualified], outputs=[min_ip_state, min_ip])
|
| 141 |
min_ip.change(**min_ip_state_kwargs)
|
| 142 |
qualified.change(**min_ip_state_kwargs)
|
| 143 |
# pin_columns.input(
|
|
|
|
| 13 |
STATS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 14 |
PCT_STATS = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
|
| 15 |
STATS_WITH_PCTLS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 16 |
+
COLUMNS = ['Pitcher', 'Team', 'Throws', 'IP', 'TBF'] + STATS
|
| 17 |
|
| 18 |
TEAMS = [
|
| 19 |
'Yomiuri',
|
|
|
|
| 38 |
'''
|
| 39 |
|
| 40 |
|
| 41 |
+
def gr_create_pitcher_leaderboard(start_date, end_date, min_ip, pitcher_lr='Both', batter_lr='Both', include_teams=None):
|
| 42 |
assert pitcher_lr in ['Both', 'Left', 'Right']
|
| 43 |
|
| 44 |
data = data_df.filter(pl.col('ballKind_code') != '-')
|
| 45 |
|
| 46 |
data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
|
|
|
|
|
|
|
| 47 |
|
| 48 |
pitcher_stats = (
|
| 49 |
+
compute_player_stats(
|
| 50 |
+
data, player_type='pitcher',
|
| 51 |
+
pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
|
| 52 |
+
batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
|
| 53 |
+
min_ip=min_ip,
|
| 54 |
+
group_by_team=True
|
| 55 |
+
)
|
| 56 |
.filter(pl.col('qualified'))
|
| 57 |
.drop('pitId', 'qualified')
|
| 58 |
.rename({
|
|
|
|
| 121 |
min_ip_state = gr.State('qualified')
|
| 122 |
min_ip = gr.Number(100, label='Min. IP', precision=0, minimum=0, interactive=False)
|
| 123 |
qualified = gr.Checkbox(True, label='Qualified')
|
| 124 |
+
with gr.Group():
|
| 125 |
+
pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Pitcher handedness')
|
| 126 |
+
batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
|
| 127 |
with gr.Row():
|
| 128 |
include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
|
| 129 |
all_teams = gr.Button('Select/Deselect all teams')
|
|
|
|
| 132 |
pin_columns = gr.Button('Pin columns')
|
| 133 |
leaderboard = gr.DataFrame(
|
| 134 |
pl.DataFrame({'Pitcher': [], 'Pitch': []}),
|
| 135 |
+
column_widths=[125, 75, 60, 50, 50] + [max(50, 10*len(stat)) for stat in STATS],
|
| 136 |
show_copy_button=True,
|
| 137 |
show_search='filter',
|
| 138 |
pinned_columns=2,
|
|
|
|
| 141 |
|
| 142 |
gr.Markdown(notes)
|
| 143 |
|
| 144 |
+
search.click(gr_create_pitcher_leaderboard, inputs=[start_date, end_date, min_ip_state, pitcher_lr, batter_lr, include_teams], outputs=leaderboard)
|
| 145 |
all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
|
| 146 |
+
min_ip_state_kwargs = dict(fn=lambda min_ip, qualified: ('qualified' if qualified else min_ip, gr.Number(interactive=not qualified)), inputs=[min_ip, qualified], outputs=[min_ip_state, min_ip])
|
| 147 |
min_ip.change(**min_ip_state_kwargs)
|
| 148 |
qualified.change(**min_ip_state_kwargs)
|
| 149 |
# pin_columns.input(
|
stats.py
CHANGED
|
@@ -69,9 +69,18 @@ def compute_team_games(data):
|
|
| 69 |
)
|
| 70 |
|
| 71 |
|
| 72 |
-
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, group_by_team=False):
|
|
|
|
|
|
|
| 73 |
assert player_type in ('pitcher', 'batter')
|
| 74 |
assert pitch_class_type in ('general', 'specific')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
|
| 76 |
if group_by_team:
|
| 77 |
id_cols.append('pitcher_team_name_short')
|
|
@@ -84,6 +93,7 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, grou
|
|
| 84 |
.group_by(*id_cols, pitch_col)
|
| 85 |
.agg(
|
| 86 |
pl.first(name_col),
|
|
|
|
| 87 |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
| 88 |
pl.first(pitch_name_col),
|
| 89 |
pl.len().alias('count'),
|
|
@@ -131,7 +141,10 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, grou
|
|
| 131 |
)
|
| 132 |
return pitch_stats
|
| 133 |
|
| 134 |
-
def compute_player_stats(data, player_type, min_ip='qualified', group_by_team=False):
|
|
|
|
|
|
|
|
|
|
| 135 |
data = (
|
| 136 |
compute_team_games(data)
|
| 137 |
.with_columns(
|
|
@@ -152,6 +165,12 @@ def compute_player_stats(data, player_type, min_ip='qualified', group_by_team=Fa
|
|
| 152 |
else:
|
| 153 |
data = data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
assert player_type in ('pitcher', 'batter')
|
| 156 |
id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
|
| 157 |
if group_by_team:
|
|
@@ -164,6 +183,7 @@ def compute_player_stats(data, player_type, min_ip='qualified', group_by_team=Fa
|
|
| 164 |
.agg(
|
| 165 |
pl.col(name_col).first(),
|
| 166 |
*([] if group_by_team else [pl.col('pitcher_team_name_short').last()]),
|
|
|
|
| 167 |
pl.col('IP').first(),
|
| 168 |
pl.col('pa_code').unique().len().alias('PA'),
|
| 169 |
pl.col('FB Velo').max(),
|
|
@@ -206,26 +226,18 @@ def compute_player_stats(data, player_type, min_ip='qualified', group_by_team=Fa
|
|
| 206 |
return player_stats
|
| 207 |
|
| 208 |
|
| 209 |
-
def get_pitcher_stats(id, lr=
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
# if start_date is not None:
|
| 213 |
-
# source_data = source_data.filter(pl.col('date') >= start_date)
|
| 214 |
-
# if end_date is not None:
|
| 215 |
-
# source_data = source_data.filter(pl.col('date') <= end_date)
|
| 216 |
-
#
|
| 217 |
-
# if game_kind is not None:
|
| 218 |
-
# source_data = source_data.filter(pl.col('coarse_game_kind') == game_kind)
|
| 219 |
source_data = data_df
|
| 220 |
source_data = filter_data_by_date_and_game_kind(source_data, start_date=start_date, end_date=end_date, game_kind=game_kind)
|
| 221 |
|
| 222 |
-
if lr is not None:
|
| 223 |
-
source_data =
|
| 224 |
|
| 225 |
-
pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches, group_by_team=False).filter(pl.col('pitId') == id)
|
| 226 |
|
| 227 |
pitch_shapes = (
|
| 228 |
-
source_data
|
| 229 |
.filter(
|
| 230 |
(pl.col('pitId') == id) &
|
| 231 |
pl.col('x').is_not_null() &
|
|
@@ -236,6 +248,5 @@ def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=Non
|
|
| 236 |
.with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
|
| 237 |
)
|
| 238 |
|
| 239 |
-
pitcher_stats = compute_player_stats(source_data, player_type='pitcher', min_ip=min_ip, group_by_team=False).filter(pl.col('pitId') == id)
|
| 240 |
-
|
| 241 |
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|
|
|
|
| 69 |
)
|
| 70 |
|
| 71 |
|
| 72 |
+
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitcher_lr='both', batter_lr='both', group_by_team=False):
|
| 73 |
+
assert pitcher_lr in ('both', 'l', 'r')
|
| 74 |
+
assert batter_lr in ('both', 'l', 'r')
|
| 75 |
assert player_type in ('pitcher', 'batter')
|
| 76 |
assert pitch_class_type in ('general', 'specific')
|
| 77 |
+
|
| 78 |
+
if pitcher_lr != 'both':
|
| 79 |
+
data = data.filter(pl.col('pitLR') == pitcher_lr)
|
| 80 |
+
|
| 81 |
+
if batter_lr != 'both':
|
| 82 |
+
data = data.filter(pl.col('batLR') == batter_lr)
|
| 83 |
+
|
| 84 |
id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
|
| 85 |
if group_by_team:
|
| 86 |
id_cols.append('pitcher_team_name_short')
|
|
|
|
| 93 |
.group_by(*id_cols, pitch_col)
|
| 94 |
.agg(
|
| 95 |
pl.first(name_col),
|
| 96 |
+
pl.col('pitLR').first().str.to_uppercase().alias('Throws'),
|
| 97 |
*([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
| 98 |
pl.first(pitch_name_col),
|
| 99 |
pl.len().alias('count'),
|
|
|
|
| 141 |
)
|
| 142 |
return pitch_stats
|
| 143 |
|
| 144 |
+
def compute_player_stats(data, player_type, min_ip='qualified', pitcher_lr='both', batter_lr='both', group_by_team=False):
|
| 145 |
+
assert pitcher_lr in ('both', 'l', 'r')
|
| 146 |
+
assert batter_lr in ('both', 'l', 'r')
|
| 147 |
+
|
| 148 |
data = (
|
| 149 |
compute_team_games(data)
|
| 150 |
.with_columns(
|
|
|
|
| 165 |
else:
|
| 166 |
data = data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
|
| 167 |
|
| 168 |
+
if pitcher_lr != 'both':
|
| 169 |
+
data = data.filter(pl.col('pitLR') == pitcher_lr)
|
| 170 |
+
|
| 171 |
+
if batter_lr != 'both':
|
| 172 |
+
data = data.filter(pl.col('batLR') == batter_lr)
|
| 173 |
+
|
| 174 |
assert player_type in ('pitcher', 'batter')
|
| 175 |
id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
|
| 176 |
if group_by_team:
|
|
|
|
| 183 |
.agg(
|
| 184 |
pl.col(name_col).first(),
|
| 185 |
*([] if group_by_team else [pl.col('pitcher_team_name_short').last()]),
|
| 186 |
+
pl.col('pitLR').first().str.to_uppercase().alias('Throws'),
|
| 187 |
pl.col('IP').first(),
|
| 188 |
pl.col('pa_code').unique().len().alias('PA'),
|
| 189 |
pl.col('FB Velo').max(),
|
|
|
|
| 226 |
return player_stats
|
| 227 |
|
| 228 |
|
| 229 |
+
def get_pitcher_stats(id, lr='both', game_kind=None, start_date=None, end_date=None, min_ip=1, min_pitches=1, pitch_class_type='specific'):
|
| 230 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
source_data = data_df
|
| 232 |
source_data = filter_data_by_date_and_game_kind(source_data, start_date=start_date, end_date=end_date, game_kind=game_kind)
|
| 233 |
|
| 234 |
+
# if lr is not None:
|
| 235 |
+
# source_data =
|
| 236 |
|
| 237 |
+
pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches, batter_lr=lr, group_by_team=False).filter(pl.col('pitId') == id)
|
| 238 |
|
| 239 |
pitch_shapes = (
|
| 240 |
+
(source_data.filter(pl.col('batLR') == lr) if lr != 'both' else source_data)
|
| 241 |
.filter(
|
| 242 |
(pl.col('pitId') == id) &
|
| 243 |
pl.col('x').is_not_null() &
|
|
|
|
| 248 |
.with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
|
| 249 |
)
|
| 250 |
|
| 251 |
+
pitcher_stats = compute_player_stats(source_data, player_type='pitcher', min_ip=min_ip, batter_lr=lr, group_by_team=False).filter(pl.col('pitId') == id)
|
|
|
|
| 252 |
return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
|