Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Commit 
							
							·
						
						0b50ce4
	
1
								Parent(s):
							
							48dfbf7
								
Add pitcher leaderboard
Browse files- app.py +4 -0
- convert.py +5 -0
- pitch_leaderboard.py +6 -25
- pitcher_leaderboard.py +158 -0
- plotting.py +1 -1
- stats.py +49 -13
    	
        app.py
    CHANGED
    
    | @@ -3,6 +3,7 @@ import matplotlib as mpl | |
| 3 |  | 
| 4 | 
             
            from data import data_df
         | 
| 5 | 
             
            from pitcher_overview import create_pitcher_overview
         | 
|  | |
| 6 | 
             
            from pitch_leaderboard import create_pitch_leaderboard
         | 
| 7 | 
             
            from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
         | 
| 8 | 
             
            from css import css
         | 
| @@ -15,6 +16,7 @@ with open('updated.txt') as f: | |
| 15 |  | 
| 16 | 
             
            limitations = '''**General Limitations**
         | 
| 17 | 
             
            - As new players make their debut, some names may be missing or not translated/transliterated correctly.
         | 
|  | |
| 18 | 
             
            '''
         | 
| 19 |  | 
| 20 | 
             
            with open('acknowledgements.md', 'r') as f:
         | 
| @@ -24,6 +26,8 @@ if __name__ == '__main__': | |
| 24 | 
             
              with gr.Blocks(css=css) as app:
         | 
| 25 | 
             
                with gr.Tab('Pitcher Overview'):
         | 
| 26 | 
             
                  create_pitcher_overview(data_df)
         | 
|  | |
|  | |
| 27 | 
             
                with gr.Tab('Pitch Leaderboard'):
         | 
| 28 | 
             
                  create_pitch_leaderboard()
         | 
| 29 | 
             
                with gr.Tab('Daily/Weekly Leaderboard'):
         | 
|  | |
| 3 |  | 
| 4 | 
             
            from data import data_df
         | 
| 5 | 
             
            from pitcher_overview import create_pitcher_overview
         | 
| 6 | 
            +
            from pitcher_leaderboard import create_pitcher_leaderboard
         | 
| 7 | 
             
            from pitch_leaderboard import create_pitch_leaderboard
         | 
| 8 | 
             
            from daily_weekly_leaderboard import create_daily_weekly_leaderboard_app
         | 
| 9 | 
             
            from css import css
         | 
|  | |
| 16 |  | 
| 17 | 
             
            limitations = '''**General Limitations**
         | 
| 18 | 
             
            - As new players make their debut, some names may be missing or not translated/transliterated correctly.
         | 
| 19 | 
            +
            - IP is overestimated
         | 
| 20 | 
             
            '''
         | 
| 21 |  | 
| 22 | 
             
            with open('acknowledgements.md', 'r') as f:
         | 
|  | |
| 26 | 
             
              with gr.Blocks(css=css) as app:
         | 
| 27 | 
             
                with gr.Tab('Pitcher Overview'):
         | 
| 28 | 
             
                  create_pitcher_overview(data_df)
         | 
| 29 | 
            +
                with gr.Tab('Pitcher Leaderboard'):
         | 
| 30 | 
            +
                  create_pitcher_leaderboard()
         | 
| 31 | 
             
                with gr.Tab('Pitch Leaderboard'):
         | 
| 32 | 
             
                  create_pitch_leaderboard()
         | 
| 33 | 
             
                with gr.Tab('Daily/Weekly Leaderboard'):
         | 
    	
        convert.py
    CHANGED
    
    | @@ -161,6 +161,11 @@ presult = { | |
| 161 | 
             
                141: 'Unknown'
         | 
| 162 | 
             
            }
         | 
| 163 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 164 | 
             
            bresult = {
         | 
| 165 | 
             
                0: '空振り三振',
         | 
| 166 | 
             
                1: '単打', # 1b gb p
         | 
|  | |
| 161 | 
             
                141: 'Unknown'
         | 
| 162 | 
             
            }
         | 
| 163 |  | 
| 164 | 
            +
            def verify_and_return_presult(presults):
         | 
| 165 | 
            +
              for _presult in presults:
         | 
| 166 | 
            +
                assert _presult in presult.values(), f'{_presult} is invalid'
         | 
| 167 | 
            +
              return presults
         | 
| 168 | 
            +
             | 
| 169 | 
             
            bresult = {
         | 
| 170 | 
             
                0: '空振り三振',
         | 
| 171 | 
             
                1: '単打', # 1b gb p
         | 
    	
        pitch_leaderboard.py
    CHANGED
    
    | @@ -32,6 +32,9 @@ TEAMS  = [ | |
| 32 | 
             
            ]
         | 
| 33 | 
             
            notes = '''**Limitations**
         | 
| 34 | 
             
            - [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
         | 
|  | |
|  | |
|  | |
| 35 | 
             
            '''
         | 
| 36 |  | 
| 37 |  | 
| @@ -44,29 +47,8 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B | |
| 44 | 
             
              if pitcher_lr != 'Both':
         | 
| 45 | 
             
                data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
         | 
| 46 |  | 
| 47 | 
            -
              # both, left, right = [
         | 
| 48 | 
            -
                # (
         | 
| 49 | 
            -
                  # compute_pitch_stats(df, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
         | 
| 50 | 
            -
                  # .filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
         | 
| 51 | 
            -
                  # .drop('qualified')
         | 
| 52 | 
            -
                  # .rename({'pitcher_name': 'Pitcher', 'count': 'Count', 'usage': 'Usage', 'ballKind': 'Pitch', 'general_ballKind': 'Pitch (General)'} | {f'{stat}_pctl': f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS})
         | 
| 53 | 
            -
                  # .with_columns(
         | 
| 54 | 
            -
                    # pl.col(stat).mul(100).round(1)
         | 
| 55 | 
            -
                    # for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
         | 
| 56 | 
            -
                  # )
         | 
| 57 | 
            -
                  # [['pitId', 'ballKind_code', 'Pitcher', 'Pitch', 'Pitch (General)', 'Count', 'Usage'] + STATS_WITH_PCTLS]
         | 
| 58 | 
            -
                # )
         | 
| 59 | 
            -
                # for df
         | 
| 60 | 
            -
                # in [data, data.filter(pl.col('batLR') == 'l'), data.filter(pl.col('batLR') == 'r')]
         | 
| 61 | 
            -
              # ]
         | 
| 62 | 
            -
              # pitch_stats = (
         | 
| 63 | 
            -
                # both
         | 
| 64 | 
            -
                # .join(left, on=['pitId', 'ballKind_code'], suffix=' (LHH)', how='full')
         | 
| 65 | 
            -
                # .join(right, on=['pitId', 'ballKind_code'], suffix=' (RHH)', how='full')
         | 
| 66 | 
            -
                # .drop('pitId', 'ballKind_code', *list(chain.from_iterable([[f'{col} ({handedness}HH)' for col in ['pitId', 'ballKind_code', 'Pitcher', 'Pitch', 'Pitch (General)']] for handedness in ('L', 'R')])))
         | 
| 67 | 
            -
              # )
         | 
| 68 | 
             
              pitch_stats = (
         | 
| 69 | 
            -
                    compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific')
         | 
| 70 | 
             
                    .filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
         | 
| 71 | 
             
                    .drop('pitId', 'ballKind_code', 'qualified')
         | 
| 72 | 
             
                    .rename({
         | 
| @@ -85,7 +67,6 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr='B | |
| 85 | 
             
                      pl.col(stat).mul(100)
         | 
| 86 | 
             
                      for stat in PCT_STATS
         | 
| 87 | 
             
                    )
         | 
| 88 | 
            -
                    # [['Pitcher', 'Team', 'Pitch', 'Pitch (General)'] + STATS + [f'{stat}_pctl' for stat in STATS_WITH_PCTLS]]
         | 
| 89 | 
             
              )
         | 
| 90 |  | 
| 91 | 
             
              if include_teams is not None:
         | 
| @@ -146,7 +127,7 @@ def create_pitch_leaderboard(): | |
| 146 | 
             
                  with gr.Column(scale=1):
         | 
| 147 | 
             
                    all_pitches = gr.Button('Select/Deselect all pitches')
         | 
| 148 | 
             
                    min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
         | 
| 149 | 
            -
                     | 
| 150 | 
             
                with gr.Row():
         | 
| 151 | 
             
                  include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
         | 
| 152 | 
             
                  all_teams = gr.Button('Select/Deselect all teams')
         | 
| @@ -164,7 +145,7 @@ def create_pitch_leaderboard(): | |
| 164 |  | 
| 165 | 
             
                gr.Markdown(notes)
         | 
| 166 |  | 
| 167 | 
            -
                search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches,  | 
| 168 | 
             
                all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
         | 
| 169 | 
             
                all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
         | 
| 170 | 
             
                # pin_columns.input(
         | 
|  | |
| 32 | 
             
            ]
         | 
| 33 | 
             
            notes = '''**Limitations**
         | 
| 34 | 
             
            - [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            **To-do**
         | 
| 37 | 
            +
            - Add post-season
         | 
| 38 | 
             
            '''
         | 
| 39 |  | 
| 40 |  | 
|  | |
| 47 | 
             
              if pitcher_lr != 'Both':
         | 
| 48 | 
             
                data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
         | 
| 49 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 50 | 
             
              pitch_stats = (
         | 
| 51 | 
            +
                    compute_pitch_stats(data, player_type='pitcher', min_pitches=min_pitches, pitch_class_type='specific', group_by_team=True)
         | 
| 52 | 
             
                    .filter(pl.col('qualified') & (pl.col('ballKind').is_in(include_pitches)))
         | 
| 53 | 
             
                    .drop('pitId', 'ballKind_code', 'qualified')
         | 
| 54 | 
             
                    .rename({
         | 
|  | |
| 67 | 
             
                      pl.col(stat).mul(100)
         | 
| 68 | 
             
                      for stat in PCT_STATS
         | 
| 69 | 
             
                    )
         | 
|  | |
| 70 | 
             
              )
         | 
| 71 |  | 
| 72 | 
             
              if include_teams is not None:
         | 
|  | |
| 127 | 
             
                  with gr.Column(scale=1):
         | 
| 128 | 
             
                    all_pitches = gr.Button('Select/Deselect all pitches')
         | 
| 129 | 
             
                    min_pitches = gr.Number(100, label='Min. Pitches', precision=0, minimum=0)
         | 
| 130 | 
            +
                    batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
         | 
| 131 | 
             
                with gr.Row():
         | 
| 132 | 
             
                  include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
         | 
| 133 | 
             
                  all_teams = gr.Button('Select/Deselect all teams')
         | 
|  | |
| 145 |  | 
| 146 | 
             
                gr.Markdown(notes)
         | 
| 147 |  | 
| 148 | 
            +
                search.click(gr_create_pitch_leaderboard, inputs=[start_date, end_date, min_pitches, batter_lr, include_pitches, include_teams], outputs=leaderboard)
         | 
| 149 | 
             
                all_pitches.click(lambda _pitch_types : [] if _pitch_types == PITCH_TYPES else PITCH_TYPES, inputs=include_pitches, outputs=include_pitches)
         | 
| 150 | 
             
                all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
         | 
| 151 | 
             
                # pin_columns.input(
         | 
    	
        pitcher_leaderboard.py
    ADDED
    
    | @@ -0,0 +1,158 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import gradio as gr
         | 
| 2 | 
            +
            import polars as pl
         | 
| 3 | 
            +
            import numpy as np
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            from datetime import datetime
         | 
| 6 | 
            +
            # from itertools import chain
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from data import data_df
         | 
| 9 | 
            +
            from stats import compute_player_stats, filter_data_by_date_and_game_kind
         | 
| 10 | 
            +
            from convert import team_names_short_to_color, get_text_color_from_team
         | 
| 11 | 
            +
            from plotting import stat_cmap
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            STATS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
         | 
| 14 | 
            +
            PCT_STATS = ['K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%', 'MM%']
         | 
| 15 | 
            +
            STATS_WITH_PCTLS = ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
         | 
| 16 | 
            +
            COLUMNS = ['Pitcher', 'Team', 'IP', 'TBF'] + STATS
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            TEAMS  = [
         | 
| 19 | 
            +
                'Yomiuri',
         | 
| 20 | 
            +
                'Yakult',
         | 
| 21 | 
            +
                'DeNA',
         | 
| 22 | 
            +
                'Chunichi',
         | 
| 23 | 
            +
                'Hanshin',
         | 
| 24 | 
            +
                'Hiroshima',
         | 
| 25 | 
            +
                'Nipponham',
         | 
| 26 | 
            +
                'Rakuten',
         | 
| 27 | 
            +
                'Seibu',
         | 
| 28 | 
            +
                'Lotte',
         | 
| 29 | 
            +
                'ORIX',
         | 
| 30 | 
            +
                'SoftBank'
         | 
| 31 | 
            +
            ]
         | 
| 32 | 
            +
            notes = '''**Limitations**
         | 
| 33 | 
            +
            - [Column widths get messed up when filtering](https://github.com/gradio-app/gradio/issues/11564)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            **To-do**
         | 
| 36 | 
            +
            - Fix IP calculation
         | 
| 37 | 
            +
            - Add post-season
         | 
| 38 | 
            +
            '''
         | 
| 39 | 
            +
             | 
| 40 | 
            +
             | 
| 41 | 
            +
            def gr_create_pitcher_leaderboard(start_date, end_date, min_ip, pitcher_lr='Both', include_teams=None):
         | 
| 42 | 
            +
              assert pitcher_lr in ['Both', 'Left', 'Right']
         | 
| 43 | 
            +
             | 
| 44 | 
            +
              data = data_df.filter(pl.col('ballKind_code') != '-')
         | 
| 45 | 
            +
              
         | 
| 46 | 
            +
              data = filter_data_by_date_and_game_kind(data, start_date=start_date, end_date=end_date, game_kind='Regular Season')
         | 
| 47 | 
            +
              if pitcher_lr != 'Both':
         | 
| 48 | 
            +
                data = data.filter(pl.col('batLR') == pitcher_lr[0].lower())
         | 
| 49 | 
            +
                
         | 
| 50 | 
            +
              pitcher_stats = (
         | 
| 51 | 
            +
                    compute_player_stats(data, player_type='pitcher', min_ip=min_ip, group_by_team=True)
         | 
| 52 | 
            +
                    .filter(pl.col('qualified'))
         | 
| 53 | 
            +
                    .drop('pitId', 'qualified')
         | 
| 54 | 
            +
                    .rename({
         | 
| 55 | 
            +
                      'pitcher_name': 'Pitcher', 
         | 
| 56 | 
            +
                      'pitcher_team_name_short': 'Team', 
         | 
| 57 | 
            +
                      'PA': 'TBF'
         | 
| 58 | 
            +
                    })
         | 
| 59 | 
            +
                    .with_columns(
         | 
| 60 | 
            +
                      pl.col(stat).mul(100)
         | 
| 61 | 
            +
                      for stat in PCT_STATS
         | 
| 62 | 
            +
                    )
         | 
| 63 | 
            +
              )
         | 
| 64 | 
            +
             | 
| 65 | 
            +
              if include_teams is not None:
         | 
| 66 | 
            +
                pitcher_stats = pitcher_stats.filter(pl.col('Team').is_in(include_teams))
         | 
| 67 | 
            +
             | 
| 68 | 
            +
              styling = []
         | 
| 69 | 
            +
              for i, row in enumerate(pitcher_stats[COLUMNS].iter_rows()):
         | 
| 70 | 
            +
                styling_row = []
         | 
| 71 | 
            +
                for col, item in zip(pitcher_stats[COLUMNS].columns, row):
         | 
| 72 | 
            +
                  _styling = 'font-size: 0.75em; '
         | 
| 73 | 
            +
                  if col in STATS_WITH_PCTLS:
         | 
| 74 | 
            +
                    r, g, b = (stat_cmap([pitcher_stats[f'{col}_pctl'][i]])[0, :3]*255).astype(np.uint8)
         | 
| 75 | 
            +
                    styling_row.append(f'background-color: rgba({r}, {g}, {b})')
         | 
| 76 | 
            +
                  elif col == 'Team':
         | 
| 77 | 
            +
                    styling_row.append(f'color: {get_text_color_from_team(item)}; background-color: {team_names_short_to_color[item]}')
         | 
| 78 | 
            +
                  else:
         | 
| 79 | 
            +
                    styling_row.append('')
         | 
| 80 | 
            +
                styling.append(styling_row)
         | 
| 81 | 
            +
             | 
| 82 | 
            +
              display_value = []
         | 
| 83 | 
            +
              for row in pitcher_stats[COLUMNS].iter_rows():
         | 
| 84 | 
            +
                display_value_row = []
         | 
| 85 | 
            +
                for col, item in zip(COLUMNS, row):
         | 
| 86 | 
            +
                  if col in PCT_STATS:
         | 
| 87 | 
            +
                    display_value_row.append(f'{item:.1f}%')
         | 
| 88 | 
            +
                  elif isinstance(item, float):
         | 
| 89 | 
            +
                    display_value_row.append(f'{item:.1f}')
         | 
| 90 | 
            +
                  else:
         | 
| 91 | 
            +
                    display_value_row.append(item)
         | 
| 92 | 
            +
                display_value.append(display_value_row)
         | 
| 93 | 
            +
             | 
| 94 | 
            +
              value = {
         | 
| 95 | 
            +
                  'data': pitcher_stats[COLUMNS].rows(),
         | 
| 96 | 
            +
                  'headers': COLUMNS,
         | 
| 97 | 
            +
                  'metadata': {
         | 
| 98 | 
            +
                      'styling': styling,
         | 
| 99 | 
            +
                      'display_value': display_value,
         | 
| 100 | 
            +
                  }
         | 
| 101 | 
            +
              }
         | 
| 102 | 
            +
              
         | 
| 103 | 
            +
              return value
         | 
| 104 | 
            +
             | 
| 105 | 
            +
             | 
| 106 | 
            +
            def create_pitcher_leaderboard():
         | 
| 107 | 
            +
              now = datetime.now()
         | 
| 108 | 
            +
              start_datetime_init = datetime(now.year, 1, 1)
         | 
| 109 | 
            +
              end_datetime_init = now
         | 
| 110 | 
            +
              with gr.Blocks() as app:
         | 
| 111 | 
            +
                gr.Markdown('# Pitch Leaderboard')
         | 
| 112 | 
            +
                with gr.Row():
         | 
| 113 | 
            +
                  start_date = gr.DateTime(start_datetime_init, include_time=False, type='datetime', label='Start')
         | 
| 114 | 
            +
                  end_date = gr.DateTime(end_datetime_init, include_time=False, type='datetime', label='End')
         | 
| 115 | 
            +
                with gr.Row():
         | 
| 116 | 
            +
                  with gr.Group():
         | 
| 117 | 
            +
                    min_ip_state = gr.State('qualified')
         | 
| 118 | 
            +
                    min_ip = gr.Number(100, label='Min. IP', precision=0, minimum=0, interactive=False)
         | 
| 119 | 
            +
                    qualified = gr.Checkbox(True, label='Qualified')
         | 
| 120 | 
            +
                  batter_lr = gr.Radio(['Both', 'Left', 'Right'], value='Both', label='Batter handedness')
         | 
| 121 | 
            +
                with gr.Row():
         | 
| 122 | 
            +
                  include_teams = gr.CheckboxGroup(TEAMS, value=TEAMS, label='Teams', scale=3)
         | 
| 123 | 
            +
                  all_teams = gr.Button('Select/Deselect all teams')
         | 
| 124 | 
            +
                    
         | 
| 125 | 
            +
                search = gr.Button('Search')
         | 
| 126 | 
            +
                pin_columns = gr.Button('Pin columns')
         | 
| 127 | 
            +
                leaderboard = gr.DataFrame(
         | 
| 128 | 
            +
                  pl.DataFrame({'Pitcher': [], 'Pitch': []}),
         | 
| 129 | 
            +
                  column_widths=[125, 75, 50, 50] + [max(50, 10*len(stat)) for stat in STATS],
         | 
| 130 | 
            +
                  show_copy_button=True,
         | 
| 131 | 
            +
                  show_search='filter',
         | 
| 132 | 
            +
                  pinned_columns=2,
         | 
| 133 | 
            +
                  elem_id='leaderboard'
         | 
| 134 | 
            +
                )
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                gr.Markdown(notes)
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                search.click(gr_create_pitcher_leaderboard, inputs=[start_date, end_date, min_ip_state, batter_lr, include_teams], outputs=leaderboard)
         | 
| 139 | 
            +
                all_teams.click(lambda _teams : [] if _teams == TEAMS else TEAMS, inputs=include_teams, outputs=include_teams)
         | 
| 140 | 
            +
                min_ip_state_kwargs = dict(fn=lambda min_ip, qualified: (qualified if qualified else min_ip, gr.Number(interactive=not qualified)), inputs=[min_ip, qualified], outputs=[min_ip_state, min_ip])
         | 
| 141 | 
            +
                min_ip.change(**min_ip_state_kwargs)
         | 
| 142 | 
            +
                qualified.change(**min_ip_state_kwargs)
         | 
| 143 | 
            +
                # pin_columns.input(
         | 
| 144 | 
            +
                  # lambda _pin_columns : gr.update(pinned_columns=None if _pin_columns else 3),
         | 
| 145 | 
            +
                  # inputs=pin_columns,
         | 
| 146 | 
            +
                  # outputs=leaderboard
         | 
| 147 | 
            +
                # )
         | 
| 148 | 
            +
                # pin_columns_state = gr.State(True)
         | 
| 149 | 
            +
                pin_columns.click(
         | 
| 150 | 
            +
                  lambda : gr.update(pinned_columns=None),
         | 
| 151 | 
            +
                  outputs=leaderboard
         | 
| 152 | 
            +
                )
         | 
| 153 | 
            +
                
         | 
| 154 | 
            +
              return app
         | 
| 155 | 
            +
             | 
| 156 | 
            +
            if __name__ == '__main__':
         | 
| 157 | 
            +
              app = create_pitcher_leaderboard()
         | 
| 158 | 
            +
              app.launch()
         | 
    	
        plotting.py
    CHANGED
    
    | @@ -92,7 +92,7 @@ def plot_loc(ax, locs): | |
| 92 |  | 
| 93 | 
             
            def plot_velo(ax, velos):
         | 
| 94 | 
             
              trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
         | 
| 95 | 
            -
              for (pitch,), _velos in velos.group_by('general_ballKind_code'):
         | 
| 96 | 
             
                _velos = _velos.filter(((pl.col('ballSpeed_mph') - pl.col('ballSpeed_mph').mean())/ pl.col('ballSpeed_mph').std()).abs() < 3)
         | 
| 97 |  | 
| 98 | 
             
                if len(_velos) <= 1:
         | 
|  | |
| 92 |  | 
| 93 | 
             
            def plot_velo(ax, velos):
         | 
| 94 | 
             
              trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
         | 
| 95 | 
            +
              for (pitch,), _velos in velos.sort(pl.len().over('general_ballKind_code'), descending=True).group_by('general_ballKind_code', maintain_order=True):
         | 
| 96 | 
             
                _velos = _velos.filter(((pl.col('ballSpeed_mph') - pl.col('ballSpeed_mph').mean())/ pl.col('ballSpeed_mph').std()).abs() < 3)
         | 
| 97 |  | 
| 98 | 
             
                if len(_velos) <= 1:
         | 
    	
        stats.py
    CHANGED
    
    | @@ -3,6 +3,12 @@ from data import data_df | |
| 3 |  | 
| 4 | 
             
            from types import SimpleNamespace
         | 
| 5 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 6 | 
             
            def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
         | 
| 7 | 
             
              if start_date is not None:
         | 
| 8 | 
             
                data = data.filter(pl.col('date') >= start_date)
         | 
| @@ -63,17 +69,19 @@ def compute_team_games(data): | |
| 63 | 
             
              )
         | 
| 64 |  | 
| 65 |  | 
| 66 | 
            -
            def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
         | 
| 67 | 
             
              assert player_type in ('pitcher', 'batter')
         | 
| 68 | 
             
              assert pitch_class_type in ('general', 'specific')
         | 
| 69 | 
            -
               | 
|  | |
|  | |
| 70 | 
             
              name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
         | 
| 71 | 
             
              pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
         | 
| 72 | 
             
              pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
         | 
| 73 | 
             
              pitch_stats = (
         | 
| 74 | 
             
                  data
         | 
| 75 | 
             
                  .with_columns((pl.col('ballSpeed') / 1.609).round(1).alias('mph'))
         | 
| 76 | 
            -
                  .group_by( | 
| 77 | 
             
                  .agg(
         | 
| 78 | 
             
                      pl.first(name_col),
         | 
| 79 | 
             
                      *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
         | 
| @@ -119,17 +127,23 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1): | |
| 119 | 
             
                      for stat in ['Avg KPH', 'Max KPH', 'Avg MPH', 'Max MPH', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
         | 
| 120 | 
             
                  )
         | 
| 121 | 
             
                  .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
         | 
| 122 | 
            -
                  .sort( | 
| 123 | 
             
              )
         | 
| 124 | 
             
              return pitch_stats
         | 
| 125 | 
            -
              
         | 
| 126 |  | 
| 127 | 
            -
            def compute_player_stats(data, player_type, min_ip='qualified'):
         | 
| 128 | 
             
              data = (
         | 
| 129 | 
             
                  compute_team_games(data)
         | 
| 130 | 
             
                  .with_columns(
         | 
| 131 | 
             
                      pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
         | 
| 132 | 
             
                      pl.col('inning_code').unique().len().over('pitId').alias('IP')
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 133 | 
             
                  )
         | 
| 134 | 
             
              )
         | 
| 135 |  | 
| @@ -139,17 +153,38 @@ def compute_player_stats(data, player_type, min_ip='qualified'): | |
| 139 | 
             
                data = data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
         | 
| 140 |  | 
| 141 | 
             
              assert player_type in ('pitcher', 'batter') 
         | 
| 142 | 
            -
               | 
|  | |
|  | |
| 143 | 
             
              name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
         | 
| 144 | 
             
              player_stats = (
         | 
| 145 | 
             
                data
         | 
| 146 | 
            -
                . | 
|  | |
| 147 | 
             
                .agg(
         | 
| 148 | 
             
                    pl.col(name_col).first(),
         | 
|  | |
|  | |
|  | |
|  | |
| 149 | 
             
                    (pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
         | 
| 150 | 
             
                    (pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
         | 
| 151 | 
            -
                    (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
         | 
| 152 | 
             
                    pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 153 | 
             
                    pl.first('qualified')
         | 
| 154 | 
             
                )
         | 
| 155 | 
             
                .explode('batType')
         | 
| @@ -163,9 +198,10 @@ def compute_player_stats(data, player_type, min_ip='qualified'): | |
| 163 | 
             
                )
         | 
| 164 | 
             
                .drop('G', 'F', 'B', 'P', 'L')
         | 
| 165 | 
             
                .with_columns(
         | 
| 166 | 
            -
                    (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending= | 
| 167 | 
            -
                    for stat in [' | 
| 168 | 
             
                )
         | 
|  | |
| 169 | 
             
              )
         | 
| 170 | 
             
              return player_stats
         | 
| 171 |  | 
| @@ -186,7 +222,7 @@ def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=Non | |
| 186 | 
             
              if lr is not None:
         | 
| 187 | 
             
                source_data = source_data.filter(pl.col('batLR') == lr)
         | 
| 188 |  | 
| 189 | 
            -
              pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches).filter(pl.col('pitId') == id)
         | 
| 190 |  | 
| 191 | 
             
              pitch_shapes = (
         | 
| 192 | 
             
                  source_data
         | 
| @@ -200,6 +236,6 @@ def get_pitcher_stats(id, lr=None, game_kind=None, start_date=None, end_date=Non | |
| 200 | 
             
                  .with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
         | 
| 201 | 
             
              )
         | 
| 202 |  | 
| 203 | 
            -
              pitcher_stats = compute_player_stats(source_data, player_type='pitcher', min_ip=min_ip).filter(pl.col('pitId') == id)
         | 
| 204 |  | 
| 205 | 
             
              return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
         | 
|  | |
| 3 |  | 
| 4 | 
             
            from types import SimpleNamespace
         | 
| 5 |  | 
| 6 | 
            +
            from convert import verify_and_return_presult
         | 
| 7 | 
            +
             | 
| 8 | 
            +
             | 
| 9 | 
            +
            valid_pitch = pl.col('x').is_not_null() & pl.col('y').is_not_null() & (pl.col('ballSpeed') > 0)
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
             
            def filter_data_by_date_and_game_kind(data, start_date=None, end_date=None, game_kind=None):
         | 
| 13 | 
             
              if start_date is not None:
         | 
| 14 | 
             
                data = data.filter(pl.col('date') >= start_date)
         | 
|  | |
| 69 | 
             
              )
         | 
| 70 |  | 
| 71 |  | 
| 72 | 
            +
            def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1, group_by_team=False):
         | 
| 73 | 
             
              assert player_type in ('pitcher', 'batter')
         | 
| 74 | 
             
              assert pitch_class_type in ('general', 'specific')
         | 
| 75 | 
            +
              id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
         | 
| 76 | 
            +
              if group_by_team:
         | 
| 77 | 
            +
                id_cols.append('pitcher_team_name_short')
         | 
| 78 | 
             
              name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
         | 
| 79 | 
             
              pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
         | 
| 80 | 
             
              pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
         | 
| 81 | 
             
              pitch_stats = (
         | 
| 82 | 
             
                  data
         | 
| 83 | 
             
                  .with_columns((pl.col('ballSpeed') / 1.609).round(1).alias('mph'))
         | 
| 84 | 
            +
                  .group_by(*id_cols, pitch_col)
         | 
| 85 | 
             
                  .agg(
         | 
| 86 | 
             
                      pl.first(name_col),
         | 
| 87 | 
             
                      *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
         | 
|  | |
| 127 | 
             
                      for stat in ['Avg KPH', 'Max KPH', 'Avg MPH', 'Max MPH', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
         | 
| 128 | 
             
                  )
         | 
| 129 | 
             
                  .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
         | 
| 130 | 
            +
                  .sort(id_cols[0], 'count', descending=[False, True])
         | 
| 131 | 
             
              )
         | 
| 132 | 
             
              return pitch_stats
         | 
|  | |
| 133 |  | 
| 134 | 
            +
            def compute_player_stats(data, player_type, min_ip='qualified', group_by_team=False):
         | 
| 135 | 
             
              data = (
         | 
| 136 | 
             
                  compute_team_games(data)
         | 
| 137 | 
             
                  .with_columns(
         | 
| 138 | 
             
                      pl.when(pl.col('half_inning').str.ends_with('1')).then('home_games').otherwise('visitor_games').first().over('pitId').alias('games'),
         | 
| 139 | 
             
                      pl.col('inning_code').unique().len().over('pitId').alias('IP')
         | 
| 140 | 
            +
                      # pl.col('presult').is_in(verify_and_return_presult([
         | 
| 141 | 
            +
                        # 'Groundout', 'Flyout', 'Lineout', 'Groundout (Double play)',
         | 
| 142 | 
            +
                        # 'Foul fly', 'Foul line (?)',
         | 
| 143 | 
            +
                        # 'Sacrifice bunt', 'Sacrifice fly',
         | 
| 144 | 
            +
                        # "Fielder's choice", "Sacrifice fielder's choice",
         | 
| 145 | 
            +
                        # 'Bunt strikeout', 'Swinging strikeout', 'Looking strikeout'
         | 
| 146 | 
            +
                      # ])).sum().over('pitId').mul(1/3).alias('IP')
         | 
| 147 | 
             
                  )
         | 
| 148 | 
             
              )
         | 
| 149 |  | 
|  | |
| 153 | 
             
                data = data.with_columns((pl.col('IP') >= min_ip).alias('qualified'))
         | 
| 154 |  | 
| 155 | 
             
              assert player_type in ('pitcher', 'batter') 
         | 
| 156 | 
            +
              id_cols = ['pitId' if player_type == 'pitcher' else 'batId']
         | 
| 157 | 
            +
              if group_by_team:
         | 
| 158 | 
            +
                id_cols.append('pitcher_team_name_short')
         | 
| 159 | 
             
              name_col = 'pitcher_name' if player_type == 'pitcher' else 'batter_name'
         | 
| 160 | 
             
              player_stats = (
         | 
| 161 | 
             
                data
         | 
| 162 | 
            +
                .with_columns(pl.when(pl.col('general_ballKind_code').is_in(['4S', 'FC', 'SI'])).then(pl.when(valid_pitch).then('ballSpeed').mean().over('pitId', 'general_ballKind_code')).mul(1/1.609).round(1).alias('FB Velo'))
         | 
| 163 | 
            +
                .group_by(id_cols)
         | 
| 164 | 
             
                .agg(
         | 
| 165 | 
             
                    pl.col(name_col).first(),
         | 
| 166 | 
            +
                    *([] if group_by_team else [pl.col('pitcher_team_name_short').last()]),
         | 
| 167 | 
            +
                    pl.col('IP').first(),
         | 
| 168 | 
            +
                    pl.col('pa_code').unique().len().alias('PA'),
         | 
| 169 | 
            +
                    pl.col('FB Velo').max(),
         | 
| 170 | 
             
                    (pl.when(pl.col('presult').str.contains('strikeout')).then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('K%'),
         | 
| 171 | 
             
                    (pl.when(pl.col('presult') == 'Walk').then(1).otherwise(0).sum() / pl.col('pa_code').unique().len()).alias('BB%'),
         | 
|  | |
| 172 | 
             
                    pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
         | 
| 173 | 
            +
                    (pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
         | 
| 174 | 
            +
                    ((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
         | 
| 175 | 
            +
                    ((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
         | 
| 176 | 
            +
                    ((pl.col('swing') & ~pl.col('whiff')).sum()/pl.col('swing').sum()).alias('Contact%'),
         | 
| 177 | 
            +
                    ((pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(pl.col('zone') & pl.col('swing')).sum()).alias('Z-Contact%'),
         | 
| 178 | 
            +
                    ((~pl.col('zone') & pl.col('swing') & ~pl.col('whiff')).sum()/(~pl.col('zone') & pl.col('swing')).sum()).alias('O-Contact%'),
         | 
| 179 | 
            +
                    (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
         | 
| 180 | 
            +
                    (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
         | 
| 181 | 
            +
                    (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
         | 
| 182 | 
            +
                    (pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
         | 
| 183 | 
            +
                    (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
         | 
| 184 | 
            +
                    (pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
         | 
| 185 | 
            +
                    (pl.col('y') > 125).mean().alias('High%'),
         | 
| 186 | 
            +
                    (pl.col('y') <= 125).mean().alias('Low%'),
         | 
| 187 | 
            +
                    (pl.col('x').is_between(-20, 20) & pl.col('y').is_between(100, 100+50)).mean().alias('MM%'),
         | 
| 188 | 
             
                    pl.first('qualified')
         | 
| 189 | 
             
                )
         | 
| 190 | 
             
                .explode('batType')
         | 
|  | |
| 198 | 
             
                )
         | 
| 199 | 
             
                .drop('G', 'F', 'B', 'P', 'L')
         | 
| 200 | 
             
                .with_columns(
         | 
| 201 | 
            +
                    (pl.when(pl.col('qualified')).then(pl.col(stat)).rank(descending=stat in ['BB%', 'FB%', 'LD%'] or 'Contact%' in stat)/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
         | 
| 202 | 
            +
                    for stat in ['FB Velo', 'K%', 'BB%', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
         | 
| 203 | 
             
                )
         | 
| 204 | 
            +
                .sort('IP', descending=True)
         | 
| 205 | 
             
              )
         | 
| 206 | 
             
              return player_stats
         | 
| 207 |  | 
|  | |
| 222 | 
             
              if lr is not None:
         | 
| 223 | 
             
                source_data = source_data.filter(pl.col('batLR') == lr)
         | 
| 224 |  | 
| 225 | 
            +
              pitch_stats = compute_pitch_stats(source_data, player_type='pitcher', pitch_class_type=pitch_class_type, min_pitches=min_pitches, group_by_team=False).filter(pl.col('pitId') == id)
         | 
| 226 |  | 
| 227 | 
             
              pitch_shapes = (
         | 
| 228 | 
             
                  source_data
         | 
|  | |
| 236 | 
             
                  .with_columns((pl.col('ballSpeed')/1.609).alias('ballSpeed_mph'))
         | 
| 237 | 
             
              )
         | 
| 238 |  | 
| 239 | 
            +
              pitcher_stats = compute_player_stats(source_data, player_type='pitcher', min_ip=min_ip, group_by_team=False).filter(pl.col('pitId') == id)
         | 
| 240 |  | 
| 241 | 
             
              return SimpleNamespace(pitcher_stats=pitcher_stats, pitch_stats=pitch_stats, pitch_shapes=pitch_shapes)
         | 

