patrickramos commited on
Commit
d0e7981
·
1 Parent(s): 9f7c1e1

Add team pitching leaderboard

Browse files
Files changed (6) hide show
  1. acknowledgements.md +1 -1
  2. app.py +6 -2
  3. data.py +1 -2
  4. pitcher_leaderboard.py +2 -2
  5. player_team_leaderboard.py +210 -0
  6. stats.py +64 -24
acknowledgements.md CHANGED
@@ -6,4 +6,4 @@ The approach to visualizing pitch locations was inspired by [Stephen-Sutton Brow
6
 
7
  The attempt to be more aesthetically-minded (ex. more conscious focus on font choice and legibility) was inspired by the hockey player cards of [JFresh](https://x.com/JFreshHockey).
8
 
9
- Thanks to [ぼーのさん](https://bo-no05.hatenadiary.org/) for answering some of my questions while making this.
 
6
 
7
  The attempt to be more aesthetically-minded (ex. more conscious focus on font choice and legibility) was inspired by the hockey player cards of [JFresh](https://x.com/JFreshHockey).
8
 
9
+ Thanks to [ぼーのさん](https://bo-no05.hatenadiary.org/) for answering some of our questions while making this.
app.py CHANGED
@@ -3,7 +3,8 @@ import matplotlib as mpl
3
 
4
  from data import data_df
5
  from pitcher_overview import create_pitcher_overview
6
- from pitcher_leaderboard import create_pitcher_leaderboard
 
7
  from pitch_leaderboard import create_pitch_leaderboard
8
  from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
9
  from css import css
@@ -16,7 +17,7 @@ with open('updated.txt') as f:
16
 
17
  limitations = '''**General Limitations**
18
  - As new players make their debut, some names may be missing or not translated/transliterated correctly.
19
- - IP is overestimated
20
  '''
21
 
22
  with open('acknowledgements.md', 'r') as f:
@@ -30,11 +31,14 @@ if __name__ == '__main__':
30
  create_pitcher_leaderboard()
31
  with gr.Tab('Pitch Leaderboard'):
32
  create_pitch_leaderboard()
 
 
33
  with gr.Tab('Daily/Weekly Leaderboard'):
34
  create_daily_weekly_leaderboard_app(data_df)
35
  with gr.Tab('Acknowledgements'):
36
  gr.Markdown(acknowledgements)
37
 
 
38
  gr.Markdown('---')
39
  gr.Markdown(f'**Data up to:** {latest_data_date}')
40
  gr.Markdown(f'**Last updated:** {updated}')
 
3
 
4
  from data import data_df
5
  from pitcher_overview import create_pitcher_overview
6
+ # from pitcher_leaderboard import create_pitcher_leaderboard
7
+ from player_team_leaderboard import create_pitcher_leaderboard, create_team_pitching_leaderboard
8
  from pitch_leaderboard import create_pitch_leaderboard
9
  from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
10
  from css import css
 
17
 
18
  limitations = '''**General Limitations**
19
  - As new players make their debut, some names may be missing or not translated/transliterated correctly.
20
+ - IP is underestimated (does not count outs via caught stealing)
21
  '''
22
 
23
  with open('acknowledgements.md', 'r') as f:
 
31
  create_pitcher_leaderboard()
32
  with gr.Tab('Pitch Leaderboard'):
33
  create_pitch_leaderboard()
34
+ with gr.Tab('Team Pitching Leaderboard'):
35
+ create_team_pitching_leaderboard()
36
  with gr.Tab('Daily/Weekly Leaderboard'):
37
  create_daily_weekly_leaderboard_app(data_df)
38
  with gr.Tab('Acknowledgements'):
39
  gr.Markdown(acknowledgements)
40
 
41
+
42
  gr.Markdown('---')
43
  gr.Markdown(f'**Data up to:** {latest_data_date}')
44
  gr.Markdown(f'**Last updated:** {updated}')
data.py CHANGED
@@ -148,7 +148,7 @@ data_df = (
148
  )
149
  .join(
150
  (
151
- aux_df.filter(~pl.col('ibb'))[['universal_code', 'battingResult', 'inning_pas', 'pa_pitches']]
152
  .rename({'battingResult': 'aux_bresult', 'inning_pas': 'aux_inning_pas', 'pa_pitches': 'aux_pa_pitches'})
153
  ),
154
  on='universal_code',
@@ -214,7 +214,6 @@ data_df = (
214
  .filter(pl.col('ballKind_code') != '-')
215
  .unique()
216
  )
217
- bar = data_df
218
 
219
 
220
  def select_name(names):
 
148
  )
149
  .join(
150
  (
151
+ aux_df.filter(~pl.col('ibb'))[['universal_code', 'battingResult', 'inning_pas', 'pa_pitches', 'beforeBso', 'bso']]
152
  .rename({'battingResult': 'aux_bresult', 'inning_pas': 'aux_inning_pas', 'pa_pitches': 'aux_pa_pitches'})
153
  ),
154
  on='universal_code',
 
214
  .filter(pl.col('ballKind_code') != '-')
215
  .unique()
216
  )
 
217
 
218
 
219
  def select_name(names):
pitcher_leaderboard.py CHANGED
@@ -52,7 +52,7 @@ def gr_create_pitcher_leaderboard(start_date, end_date, min_ip, qualified, pitch
52
  data, player_type='pitcher',
53
  pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
54
  batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
55
- min_ip='qualified' if qualified else min_ip,
56
  group_by_team=True
57
  )
58
  .filter(pl.col('qualified'))
@@ -156,7 +156,7 @@ def create_pitcher_leaderboard():
156
  # )
157
  # pin_columns_state = gr.State(True)
158
  pin_columns.click(
159
- lambda : gr.update(pinned_columns=None),
160
  outputs=leaderboard
161
  )
162
 
 
52
  data, player_type='pitcher',
53
  pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
54
  batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
55
+ qual='qualified' if qualified else min_ip,
56
  group_by_team=True
57
  )
58
  .filter(pl.col('qualified'))
 
156
  # )
157
  # pin_columns_state = gr.State(True)
158
  pin_columns.click(
159
+ lambda : gr.Dataframe(pinned_columns=None),
160
  outputs=leaderboard
161
  )
162
 
player_team_leaderboard.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import polars as pl
3
+ import numpy as np
4
+
5
+ from datetime import datetime
6
+ from functools import partial
7
+
8
+ from data import data_df
9
+ from stats import compute_player_stats, filter_data_by_date_and_game_kind
10
+ from convert import team_names_short_to_color, get_text_color_from_team
11
+ from plotting import stat_cmap
12
+
13
+
14
+ TEAMS = [
15
+ 'Yomiuri',
16
+ 'Yakult',
17
+ 'DeNA',
18
+ 'Chunichi',
19
+ 'Hanshin',
20
+ 'Hiroshima',
21
+ 'Nipponham',
22
+ 'Rakuten',
23
+ 'Seibu',
24
+ 'Lotte',
25
+ 'ORIX',
26
+ 'SoftBank'
27
+ ]
28
+
29
+
30
+ notes = '''**Limitations**
31
+ - [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
32
+
33
+ **To-do**
34
+ - Fix IP calculation
35
+ - Add post-season
36
+ '''
37
+
38
+
39
+ def get_col_width(col, player_team_type):
40
+ match col:
41
+ case 'Pitcher' | 'Batter':
42
+ return 125
43
+ case 'Team':
44
+ return 75
45
+ case 'Throws' | 'Bats':
46
+ return 60
47
+ case 'IP':
48
+ return 60 if player_team_type == 'team pitching' else 50
49
+ case _:
50
+ return max(50, 10*len(col))
51
+
52
+
53
+ def create_player_team_leaderboard_app(player_team_type):
54
+ pitching = player_team_type in ('pitcher', 'team pitching')
55
+ team = 'team' in player_team_type
56
+
57
+ # stats
58
+ if pitching:
59
+ pct_stats = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
60
+ stats_with_pctls = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
61
+ cols = ['Pitcher', 'Team', 'Throws', 'IP', 'TBF', 'FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
62
+ if team:
63
+ cols = [col for col in cols if col not in ('Pitcher', 'Throws')]
64
+ else:
65
+ pct_stats = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
66
+ stats_with_pctls = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
67
+ cols = ['Batter', 'Team', 'Bats', 'PA', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
68
+ if team:
69
+ cols = [col for col in cols if col not in ('Batter', 'Bats')]
70
+
71
+ # col names
72
+ player_type = 'pitcher' if pitching else 'batter'
73
+ id_col = f'{player_type[:3].lower()}Id' if not team else f'{player_type}_team_name_short'
74
+ qual_name = 'IP' if pitching else 'PA'
75
+
76
+ def gr_create_player_team_leaderboard(start_date, end_date, min_qual, qualified, pitcher_lr='Both', batter_lr='Both', include_teams=None):
77
+ assert pitcher_lr in ['Both', 'Left', 'Right']
78
+ assert batter_lr in ['Both', 'Left', 'Right']
79
+
80
+ data = data_df.filter(pl.col('ballKind_code') != '-')
81
+
82
+ data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
83
+
84
+ rename = {f'{player_type}_team_name_short': 'Team'}
85
+ if not team:
86
+ rename[f'{player_type}_name'] = player_type.title()
87
+ if pitching:
88
+ rename['PA'] = 'TBF'
89
+
90
+ # typically "qualified" should be a valid input for min_ip for the current function,
91
+ # but we separate it from a numerical min_ip argument for API compabtibility
92
+ pitcher_stats = (
93
+ compute_player_stats(
94
+ data, player_type=player_team_type,
95
+ pitcher_lr='both' if pitcher_lr=='Both' else pitcher_lr[0].lower(),
96
+ batter_lr='both' if batter_lr == 'Both' else batter_lr[0].lower(),
97
+ qual='qualified' if qualified else min_qual,
98
+ group_by_team=not team
99
+ )
100
+ .filter(pl.col('qualified'))
101
+ .drop(['qualified'] + ([id_col] if not team else []))
102
+ .rename(rename)
103
+ .with_columns(
104
+ pl.col(stat).mul(100)
105
+ for stat in pct_stats
106
+ )
107
+ )
108
+
109
+ # if not team:
110
+ if include_teams is not None:
111
+ pitcher_stats = pitcher_stats.filter(pl.col('Team').is_in(include_teams))
112
+
113
+ styling = []
114
+ for i, row in enumerate(pitcher_stats[cols].iter_rows()):
115
+ styling_row = []
116
+ for col, item in zip(pitcher_stats[cols].columns, row):
117
+ _styling = 'font-size: 0.75em; '
118
+ if col in stats_with_pctls:
119
+ r, g, b = (stat_cmap([pitcher_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
120
+ styling_row.append(f'background-color: rgba({r}, {g}, {b})')
121
+ elif col == 'Team':
122
+ styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
123
+ else:
124
+ styling_row.append('')
125
+ styling.append(styling_row)
126
+
127
+ display_value = []
128
+ for row in pitcher_stats[cols].iter_rows():
129
+ display_value_row = []
130
+ for col, item in zip(cols, row):
131
+ if col in pct_stats:
132
+ display_value_row.append(f'{item:.1f}%')
133
+ elif isinstance(item, float):
134
+ display_value_row.append(f'{item:.1f}')
135
+ else:
136
+ display_value_row.append(item)
137
+ display_value.append(display_value_row)
138
+
139
+ value = {
140
+ 'data': pitcher_stats[cols].rows(),
141
+ 'headers': cols,
142
+ 'metadata': {
143
+ 'styling': styling,
144
+ 'display_value': display_value,
145
+ }
146
+ }
147
+
148
+ return value
149
+
150
+
151
+ now = datetime.now()
152
+ start_datetime_init = datetime(now.year, 1, 1)
153
+ end_datetime_init = now
154
+ with gr.Blocks() as app:
155
+ gr.Markdown(f'# {player_team_type.title()} Leaderboard')
156
+ with gr.Row():
157
+ start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
158
+ end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
159
+ with gr.Row():
160
+ if not team:
161
+ with gr.Group():
162
+ min_ip = gr.Number(100, label=f'Min. {qual_name}', precision=0, minimum=0, interactive=False)
163
+ qualified = gr.Checkbox(True, label='Qualified')
164
+ else:
165
+ min_ip = gr.State(0)
166
+ qualified = gr.State(False)
167
+ with gr.Group():
168
+ pitcher_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Pitcher handedness')
169
+ batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
170
+ with gr.Row():
171
+ include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
172
+ all_teams = gr.Button('Select/Deselect all teams')
173
+
174
+ search = gr.Button('Search')
175
+ pin_columns = gr.Checkbox(True, label='Pin columns')
176
+ leaderboard = gr.DataFrame(
177
+ pl.DataFrame({'Pitcher': [], 'Pitch': []}),
178
+ column_widths=[get_col_width(col, player_team_type) for col in cols],
179
+ show_copy_button=True,
180
+ show_search='filter',
181
+ pinned_columns=2,
182
+ elem_id='leaderboard'
183
+ )
184
+
185
+ gr.Markdown(notes)
186
+
187
+ search.click(gr_create_player_team_leaderboard, inputs=[start_date, end_date, min_ip, qualified, pitcher_lr, batter_lr, include_teams], outputs=leaderboard)
188
+ all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
189
+ qualified.change(lambda qualified: gr.Number(interactive=not qualified), inputs=qualified, outputs=min_ip)
190
+ pin_columns.input(
191
+ lambda pin: gr.DataFrame(pinned_columns=2 if pin else None),
192
+ inputs=pin_columns,
193
+ outputs=leaderboard
194
+ )
195
+
196
+ return app
197
+
198
+ create_pitcher_leaderboard = partial(
199
+ create_player_team_leaderboard_app,
200
+ player_team_type='pitcher'
201
+ )
202
+
203
+ create_team_pitching_leaderboard = partial(
204
+ create_player_team_leaderboard_app,
205
+ player_team_type='team pitching'
206
+ )
207
+
208
+ if __name__ == '__main__':
209
+ app = foo()
210
+ app.launch()
stats.py CHANGED
@@ -75,6 +75,8 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
75
  assert player_type in ('pitcher', 'batter')
76
  assert pitch_class_type in ('general', 'specific')
77
 
 
 
78
  if pitcher_lr != 'both':
79
  data = data.filter(pl.col('pitLR') == pitcher_lr)
80
 
@@ -82,8 +84,9 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
82
  data = data.filter(pl.col('batLR') == batter_lr)
83
 
84
  id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
 
85
  if group_by_team:
86
- id_cols.append('pitcher_team_name_short')
87
  name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
88
  pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
89
  pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
@@ -141,15 +144,32 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, pitc
141
  )
142
  return pitch_stats
143
 
144
- def compute_player_stats(data, player_type, min_ip='qualified', pitcher_lr='both', batter_lr='both', group_by_team=False):
145
  assert pitcher_lr in ('both', 'l', 'r')
146
  assert batter_lr in ('both', 'l', 'r')
147
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  data = (
149
  compute_team_games(data)
150
  .with_columns(
151
  pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
152
- pl.col('inning_code').unique().len().over('pitId').alias('IP')
 
 
153
  # pl.col('presult').is_in(verify_and_return_presult([
154
  # 'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
155
  # 'Foul fly', 'Foul line (?)',
@@ -160,32 +180,52 @@ def compute_player_stats(data, player_type, min_ip='qualified', pitcher_lr='both
160
  )
161
  )
162
 
163
- if min_ip == 'qualified':
164
- data = data.with_columns((pl.col('IP') >= pl.col('games')).alias('qualified'))
 
 
 
165
  else:
166
- data = data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
167
 
168
- if pitcher_lr != 'both':
169
- data = data.filter(pl.col('pitLR') == pitcher_lr)
170
-
171
- if batter_lr != 'both':
172
- data = data.filter(pl.col('batLR') == batter_lr)
173
 
174
- assert player_type in ('pitcher', 'batter')
175
- id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
176
- if group_by_team:
177
- id_cols.append('pitcher_team_name_short')
178
- name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  player_stats = (
180
  data
181
  .with_columns(pl.when(pl.col('general_ballKind_code').is_in(['4S', 'FC', 'SI'])).then(pl.when(valid_pitch).then('ballSpeed').mean().over('pitId', 'general_ballKind_code')).mul(1/1.609).round(1).alias('FB Velo'))
182
  .group_by(id_cols)
183
  .agg(
184
- pl.col(name_col).first(),
185
- *([] if group_by_team else [pl.col('pitcher_team_name_short').last()]),
186
- pl.col('pitLR').first().str.to_uppercase().alias('Throws'),
 
 
 
 
187
  pl.col('IP').first(),
188
- pl.col('pa_code').unique().len().alias('PA'),
189
  pl.col('FB Velo').max(),
190
  (pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
191
  (pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
@@ -218,10 +258,10 @@ def compute_player_stats(data, player_type, min_ip='qualified', pitcher_lr='both
218
  )
219
  .drop('G', 'F', 'B', 'P', 'L')
220
  .with_columns(
221
- (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat in ['BB%', 'FB%', 'LD%'] or 'Contact%' in stat)/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
222
  for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
223
  )
224
- .sort('IP', descending=True)
225
  )
226
  return player_stats
227
 
@@ -248,5 +288,5 @@ def get_pitcher_stats(id, lr='both', game_kind=None, start_date=None, end_date=N
248
  .with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
249
  )
250
 
251
- pitcher_stats = compute_player_stats(source_data, player_type='pitcher', min_ip=min_ip, batter_lr=lr, group_by_team=False).filter(pl.col('pitId') == id)
252
  return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
 
75
  assert player_type in ('pitcher', 'batter')
76
  assert pitch_class_type in ('general', 'specific')
77
 
78
+ pitching = player_type in ('pitcher', )
79
+
80
  if pitcher_lr != 'both':
81
  data = data.filter(pl.col('pitLR') == pitcher_lr)
82
 
 
84
  data = data.filter(pl.col('batLR') == batter_lr)
85
 
86
  id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
87
+ team_col = 'pitcher_team_name_short' if pitching else 'batter_team_name_short'
88
  if group_by_team:
89
+ id_cols.append(team_col)
90
  name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
91
  pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
92
  pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
 
144
  )
145
  return pitch_stats
146
 
147
+ def compute_player_stats(data, player_type, qual='qualified', pitcher_lr='both', batter_lr='both', group_by_team=False):
148
  assert pitcher_lr in ('both', 'l', 'r')
149
  assert batter_lr in ('both', 'l', 'r')
150
+ assert player_type in ('pitcher', 'batter', 'team pitching', 'team batting')
151
+
152
+ # pitching or batting, player or team
153
+ pitching = player_type in ('pitcher', 'team pitching')
154
+ team = player_type in ('team pitching', 'team batting')
155
+
156
+ # handedness filters
157
+ if pitcher_lr != 'both':
158
+ data = data.filter(pl.col('pitLR') == pitcher_lr)
159
+ if batter_lr != 'both':
160
+ data = data.filter(pl.col('batLR') == batter_lr)
161
+
162
+ if pitching:
163
+ over_col = 'pitId' if not team else 'pitcher_team_name_short'
164
+ else:
165
+ over_col = 'batId' if not team else 'batter_team_name_short'
166
  data = (
167
  compute_team_games(data)
168
  .with_columns(
169
  pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
170
+ # pl.col('inning_code').unique().len().over(over_col).alias('IP'),
171
+ (pl.col('bso').struct.field('o').cast(pl.Int32) - pl.col('beforeBso').struct.field('o').cast(pl.Int32)).sum().mul(1/3).over(over_col).alias('IP'),
172
+ pl.col('pa_code').unique().len().over(over_col).alias('PA'),
173
  # pl.col('presult').is_in(verify_and_return_presult([
174
  # 'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
175
  # 'Foul fly', 'Foul line (?)',
 
180
  )
181
  )
182
 
183
+ # qualifiers
184
+ qualified_factor = 1 if pitching else 3.1
185
+ qual_col = 'IP' if pitching else 'PA'
186
+ if qual == 'qualified':
187
+ data = data.with_columns((pl.col(qual_col) >= qualified_factor * pl.col('games')).alias('qualified'))
188
  else:
189
+ data = data.with_columns((pl.col(qual_col) >= qual).alias('qualified'))
190
 
191
+ # percentile ascending/descending
192
+ if pitching:
193
+ stat_descending_pctl = lambda stat: stat in ['BB%', 'FB%', 'LD%', 'Z-Swing%'] or 'Contact%' in stat
194
+ else:
195
+ stat_descending_pctl = lambda stat: not (stat in ['BB%', 'FB%', 'LD%', 'Swing%', 'Z-Swing%'] or 'Contact%' in stat)
196
 
197
+ # col names
198
+ match player_type:
199
+ case 'pitcher':
200
+ id_cols = ['pitId']
201
+ name_col = 'pitcher_name'
202
+ case 'batter':
203
+ id_cols = ['batId']
204
+ name_col = 'batter_name'
205
+ case _:
206
+ id_cols = []
207
+ name_col = None
208
+
209
+ team_col = 'pitcher_team_name_short' if pitching else 'batter_team_name_short'
210
+ if group_by_team or team:
211
+ id_cols.append(team_col)
212
+
213
+ handedness_col = 'pitLR' if pitching else 'batLR'
214
+ new_handedness_col = 'Throws' if pitching else 'Bats'
215
  player_stats = (
216
  data
217
  .with_columns(pl.when(pl.col('general_ballKind_code').is_in(['4S', 'FC', 'SI'])).then(pl.when(valid_pitch).then('ballSpeed').mean().over('pitId', 'general_ballKind_code')).mul(1/1.609).round(1).alias('FB Velo'))
218
  .group_by(id_cols)
219
  .agg(
220
+ *([pl.col(name_col).first()] if not team else []),
221
+ *([] if group_by_team or team else [pl.col(team_col).last()]),
222
+ *(
223
+ [pl.col(handedness_col).first().str.to_uppercase().alias(new_handedness_col) ]
224
+ if not (team and ((pitcher_lr == 'both') if pitching else (batter_lr == 'both')))
225
+ else []
226
+ ),
227
  pl.col('IP').first(),
228
+ pl.col('PA').first(),
229
  pl.col('FB Velo').max(),
230
  (pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
231
  (pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
 
258
  )
259
  .drop('G', 'F', 'B', 'P', 'L')
260
  .with_columns(
261
+ (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat_descending_pctl(stat))/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
262
  for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
263
  )
264
+ .sort(qual_col, descending=True)
265
  )
266
  return player_stats
267
 
 
288
  .with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
289
  )
290
 
291
+ pitcher_stats = compute_player_stats(source_data, player_type='pitcher', qual=min_ip, batter_lr=lr, group_by_team=False).filter(pl.col('pitId') == id)
292
  return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)