Spaces:
Running
Running
Commit
·
f89cae0
1
Parent(s):
65fefb5
Add location stats
Browse files- data.py +1 -0
- pitch_leaderboard.py +4 -4
- stats.py +5 -43
data.py
CHANGED
|
@@ -221,6 +221,7 @@ data_df = (
|
|
| 221 |
pl.col('ballKind').replace_strict(general_ball_kind).alias('general_ballKind'),
|
| 222 |
pl.col('ballKind').replace_strict(general_ball_kind_code).alias('general_ballKind_code'),
|
| 223 |
pl.col('batLR').replace_strict(lr),
|
|
|
|
| 224 |
pl.col('date').str.to_date('%Y%m%d'),
|
| 225 |
|
| 226 |
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
|
|
|
| 221 |
pl.col('ballKind').replace_strict(general_ball_kind).alias('general_ballKind'),
|
| 222 |
pl.col('ballKind').replace_strict(general_ball_kind_code).alias('general_ballKind_code'),
|
| 223 |
pl.col('batLR').replace_strict(lr),
|
| 224 |
+
pl.col('pitLR').replace_strict(lr),
|
| 225 |
pl.col('date').str.to_date('%Y%m%d'),
|
| 226 |
|
| 227 |
pl.when(pl.col('GameKindName').str.contains('Regular Season') | (pl.col('GameKindName') == 'Interleague'))
|
pitch_leaderboard.py
CHANGED
|
@@ -8,9 +8,9 @@ from data import data_df
|
|
| 8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
| 9 |
from convert import ball_kind
|
| 10 |
|
| 11 |
-
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 12 |
-
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 13 |
-
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 14 |
|
| 15 |
todo = '''
|
| 16 |
**To-do**
|
|
@@ -56,7 +56,7 @@ def gr_create_pitch_leaderboard(start_date, end_date, min_pitches, pitcher_lr, i
|
|
| 56 |
pl.col(stat).mul(100).round(1)
|
| 57 |
for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
|
| 58 |
)
|
| 59 |
-
[['Pitcher', 'Pitch', 'Pitch (General)'
|
| 60 |
)
|
| 61 |
return pitch_stats
|
| 62 |
|
|
|
|
| 8 |
from stats import compute_pitch_stats, filter_data_by_date_and_game_kind
|
| 9 |
from convert import ball_kind
|
| 10 |
|
| 11 |
+
STATS = ['Count', 'Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
|
| 12 |
+
PCT_STATS = ['Usage', 'Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%', 'Arm%', 'Glove%', 'High%', 'Low%']
|
| 13 |
+
STATS_WITH_PCTLS = ['Swing%', 'Z-Swing%', 'Chase%', 'Contact%', 'Z-Contact%', 'O-Contact%', 'SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%', 'Zone%']
|
| 14 |
|
| 15 |
todo = '''
|
| 16 |
**To-do**
|
|
|
|
| 56 |
pl.col(stat).mul(100).round(1)
|
| 57 |
for stat in PCT_STATS + [f'{stat} (Pctl)' for stat in STATS_WITH_PCTLS]
|
| 58 |
)
|
| 59 |
+
[['Pitcher', 'Pitch', 'Pitch (General)'] + STATS]
|
| 60 |
)
|
| 61 |
return pitch_stats
|
| 62 |
|
stats.py
CHANGED
|
@@ -52,48 +52,6 @@ def compute_team_games(data):
|
|
| 52 |
)
|
| 53 |
|
| 54 |
|
| 55 |
-
# def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
| 56 |
-
# assert player_type in ('pitcher', 'batter')
|
| 57 |
-
# assert pitch_class_type in ('general', 'specific')
|
| 58 |
-
# id_col = 'pitId' if player_type == 'pitcher' else 'batId'
|
| 59 |
-
# pitch_col = 'ballKind_code' if pitch_class_type == 'specific' else 'general_ballKind_code'
|
| 60 |
-
# pitch_name_col = 'ballKind' if pitch_class_type == 'specific' else 'general_ballKind'
|
| 61 |
-
# pitch_stats = (
|
| 62 |
-
# data
|
| 63 |
-
# .group_by(id_col, pitch_col)
|
| 64 |
-
# .agg(
|
| 65 |
-
# pl.first('pitcher_name'),
|
| 66 |
-
# *([pl.first('general_ballKind')] if pitch_class_type == 'specific' else []),
|
| 67 |
-
# pl.first(pitch_name_col),
|
| 68 |
-
# pl.len().alias('count'),
|
| 69 |
-
# pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 70 |
-
# (pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
| 71 |
-
# (pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
| 72 |
-
# (pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%')
|
| 73 |
-
# )
|
| 74 |
-
# .with_columns(
|
| 75 |
-
# (pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
| 76 |
-
# (pl.col('count') >= min_pitches).alias('qualified')
|
| 77 |
-
# )
|
| 78 |
-
# .explode('batType')
|
| 79 |
-
# .unnest('batType')
|
| 80 |
-
# .pivot(on='batType', values='proportion')
|
| 81 |
-
# .fill_null(0)
|
| 82 |
-
# .with_columns(
|
| 83 |
-
# (pl.col('G') + pl.col('B')).alias('GB%'),
|
| 84 |
-
# (pl.col('F') + pl.col('P')).alias('FB%'),
|
| 85 |
-
# pl.col('L').alias('LD%').round(2),
|
| 86 |
-
# )
|
| 87 |
-
# .drop('G', 'F', 'B', 'P', 'L', 'null')
|
| 88 |
-
# .with_columns(
|
| 89 |
-
# (pl.when(pl.col('qualified')).then(pl.col(stat)).rank()/pl.when(pl.col('qualified')).then(pl.col(stat)).count()).alias(f'{stat}_pctl')
|
| 90 |
-
# for stat in ['SwStr%', 'Whiff%', 'CSW%', 'GB%', 'FB%', 'LD%']
|
| 91 |
-
# )
|
| 92 |
-
# .rename({pitch_col: 'ballKind_code', pitch_name_col: 'ballKind'} if pitch_class_type == 'general' else {})
|
| 93 |
-
# .sort(id_col, 'count', descending=[False, True])
|
| 94 |
-
# )
|
| 95 |
-
# return pitch_stats
|
| 96 |
-
|
| 97 |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
| 98 |
assert player_type in ('pitcher', 'batter')
|
| 99 |
assert pitch_class_type in ('general', 'specific')
|
|
@@ -109,7 +67,6 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
| 109 |
pl.first(pitch_name_col),
|
| 110 |
pl.len().alias('count'),
|
| 111 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
| 112 |
-
(pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
|
| 113 |
(pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
|
| 114 |
((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
|
| 115 |
((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
|
|
@@ -119,6 +76,11 @@ def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
|
| 119 |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
| 120 |
(pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
| 121 |
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
)
|
| 123 |
.with_columns(
|
| 124 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|
|
|
|
| 52 |
)
|
| 53 |
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
def compute_pitch_stats(data, player_type, pitch_class_type, min_pitches=1):
|
| 56 |
assert player_type in ('pitcher', 'batter')
|
| 57 |
assert pitch_class_type in ('general', 'specific')
|
|
|
|
| 67 |
pl.first(pitch_name_col),
|
| 68 |
pl.len().alias('count'),
|
| 69 |
pl.col('aux_bresult').struct.field('batType').drop_nulls().value_counts(normalize=True),
|
|
|
|
| 70 |
(pl.col('swing').sum() / pl.col('pitch').sum()).alias('Swing%'),
|
| 71 |
((pl.col('swing') & pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Z-Swing%'),
|
| 72 |
((pl.col('swing') & ~pl.col('zone')).sum() / pl.col('pitch').sum()).alias('Chase%'),
|
|
|
|
| 76 |
(pl.col('whiff').sum() / pl.col('swing').sum()).alias('Whiff%'),
|
| 77 |
(pl.col('whiff').sum() / pl.col('pitch').sum()).alias('SwStr%'),
|
| 78 |
(pl.col('csw').sum() / pl.col('pitch').sum()).alias('CSW%'),
|
| 79 |
+
(pl.col('zone').sum() / pl.col('pitch').sum()).alias('Zone%'),
|
| 80 |
+
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') < 0).otherwise(pl.col('x') > 0)).mean().alias('Glove%'),
|
| 81 |
+
(pl.when(pl.col('pitLR') == 'r').then(pl.col('x') >= 0).otherwise(pl.col('x') <= 0)).mean().alias('Arm%'),
|
| 82 |
+
(pl.col('y') > 125).mean().alias('High%'),
|
| 83 |
+
(pl.col('y') <= 125).mean().alias('Low%')
|
| 84 |
)
|
| 85 |
.with_columns(
|
| 86 |
(pl.col('count')/pl.sum('count').over('pitId')).alias('usage'),
|