diff --git a/docs/wbc_statcast_search.md b/docs/wbc_statcast_search.md new file mode 100644 index 0000000..cabda56 --- /dev/null +++ b/docs/wbc_statcast_search.md @@ -0,0 +1,93 @@ +# WBC Statcast Search + +## `wbc_statcast_search` + +Function to search for WBC Statcast pitch-level data with custom filters based on Baseball Savant's [WBC Statcast Search](https://baseballsavant.mlb.com/statcast-search-world-baseball-classic). + +**Notification:** If the search range is too wide, the response time will be very long. + +**WBC data availability** + +> From Baseball Savant: +> World Baseball Classic pitch-level Statcast data is available beginning with the 2023 tournament. Bat tracking data will additionally be available beginning with the 2026 tournament. + +**Examples** + +```python +from baseball_stats_python import wbc_statcast_search + +# Get all pitch data in 2023 WBC +wbc_statcast_search( + season="2023" +) + +# Get all pitch data in 2026 Pool Play +wbc_statcast_search( + game_type="F" +) +``` + +**Arguments** + +| Argument | Data Type | Description | Default | +| --------------- | -------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | +| season | `str` or `list[str]` | The season(s) to search for. | Current season | +| player_type | `str` | Player type for search result. Currently only supports `pitcher` and `batter`. | "pitcher" | +| game_type | `str` or `WbcGameType` or `list[str or WbcGameType]` | Game type (`F`, `CL`, `CD`, `CW`). Also support `all` to select all options. Can check enum [WbcGameType](../enums/wbc.py) | `R` | +| pitchers_lookup | `str` or `list[str]` | Pitcher(s)'s mlbam_id. Can get MLBAM ID from Savant's WBC gameday | "" | +| batters_lookup | `str` or `list[str]` | Batter(s)'s mlbam_id. Can get MLBAM ID from Savant's WBC gameday | "" | +| debug | `bool` | Whether to print debug information | False | + +**Use Enums** + +```python +from baseball_stats_python.enums.minor import WbcGameType + +# Get Semi-Finals data +wbc_statcast_search( + game_type=WbcGameType.SEMI_FINALS +) + +``` + +**Return** + +A DataFrame with columns can be found from Baseball Savant's [CSV Docs](https://baseballsavant.mlb.com/csv-docs). + +## `wbc_statcast_pitcher_search` + +Based on `wbc_statcast_search`, but only returns pitcher data. + +**Examples** + +```python +from baseball_stats_python import wbc_statcast_pitcher_search + +# Get all pitch data of a specific pitcher +wbc_statcast_pitcher_search( + pitchers_lookup="830717" +) +``` + +**Arguments** + +Same with `wbc_statcast_search` but only can use `pitchers_lookup` filter. If `pitchers_lookup` is not provided, it will throw an error. + +## `wbc_statcast_batter_search` + +Based on `wbc_statcast_search`, but only returns pitches that target batter faced. + +**Examples** + +```python +from baseball_stats_python import wbc_statcast_batter_search + +# Get all pitch data of a specific batter +wbc_statcast_batter_search( + batters_lookup="838360" +) +``` + +**Arguments** + +Same with `wbc_statcast_batter_search` but only can use `batters_lookup` filter. If `batters_lookup` is not provided, it will throw an error. diff --git a/example.py b/example.py index cbf6bd4..72f66c9 100644 --- a/example.py +++ b/example.py @@ -4,6 +4,7 @@ minor_statcast_search, mlbam_id_search, statcast_search, + wbc_statcast_search, ) from src.baseball_stats_python.enums.minor import MinorGameType from src.baseball_stats_python.enums.statcast import GameType, MlbTeam, Month @@ -11,9 +12,9 @@ def example(): df = statcast_search( - season='2023', - pitchers_lookup='477132', - game_type=[GameType.PLAYOFFS, 'R'], + season="2023", + pitchers_lookup="477132", + game_type=[GameType.PLAYOFFS, "R"], opponent=MlbTeam.PADRES, month=Month.JUNE, ) @@ -22,22 +23,29 @@ def example(): def minor_example(): df = minor_statcast_search( - season='2023', game_type=MinorGameType.REGULAR_SEASON, pitchers_lookup='678906' + season="2023", game_type=MinorGameType.REGULAR_SEASON, pitchers_lookup="678906" ) print(df) def mlbam_id_example(): - df = mlbam_id_search('Reynolds') + df = mlbam_id_search("Lin") print(df) def spring_training_example(): df = statcast_search( - season='2025', - start_dt='2025-02-20', - end_dt='2025-02-20', - game_type='S', + season="2025", + start_dt="2025-02-20", + end_dt="2025-02-20", + game_type="S", + ) + print(df) + + +def wbc_example(): + df = wbc_statcast_search( + batters_lookup="838360", ) print(df) @@ -46,3 +54,4 @@ def spring_training_example(): # minor_example() # mlbam_id_example() # spring_training_example() +# wbc_example() diff --git a/src/baseball_stats_python/__init__.py b/src/baseball_stats_python/__init__.py index 5257fa0..33dcdf0 100644 --- a/src/baseball_stats_python/__init__.py +++ b/src/baseball_stats_python/__init__.py @@ -12,16 +12,24 @@ statcast_pitcher_search, statcast_search, ) +from .statcast.wbc_statcast_search import ( + wbc_statcast_batter_search, + wbc_statcast_pitcher_search, + wbc_statcast_search, +) __all__ = [ - 'statcast_search', - 'statcast_pitcher_search', - 'statcast_batter_search', - 'minor_statcast_search', - 'minor_statcast_pitcher_search', - 'minor_statcast_batter_search', - 'mlbam_id_search', - 'catcher_throwing', - 'runner_basestealing', - 'runner_extra_bases_taken', + "statcast_search", + "statcast_pitcher_search", + "statcast_batter_search", + "minor_statcast_search", + "minor_statcast_pitcher_search", + "minor_statcast_batter_search", + "mlbam_id_search", + "catcher_throwing", + "runner_basestealing", + "runner_extra_bases_taken", + "wbc_statcast_search", + "wbc_statcast_pitcher_search", + "wbc_statcast_batter_search", ] diff --git a/src/baseball_stats_python/constants/__init__.py b/src/baseball_stats_python/constants/__init__.py index 3d2d398..0426312 100644 --- a/src/baseball_stats_python/constants/__init__.py +++ b/src/baseball_stats_python/constants/__init__.py @@ -1 +1 @@ -DEFAULT_SEASON = 2024 +DEFAULT_SEASON = 2026 diff --git a/src/baseball_stats_python/enums/wbc.py b/src/baseball_stats_python/enums/wbc.py new file mode 100644 index 0000000..78a95b3 --- /dev/null +++ b/src/baseball_stats_python/enums/wbc.py @@ -0,0 +1,18 @@ +from .enum_base import EnumBase + + +class WbcGameType(EnumBase): + """ + Enum for WBC Game Types. + Currently WBC Statcast Search only supports Pool Play, Semi-Finals, Quarter-Finals, and Championship. + + POOL_PLAY = "F", + SEMI_FINALS = "CL", + QUARTER_FINALS = "CD", + CHAMPIONSHIP = "CW" + """ + + POOL_PLAY = "F" + SEMI_FINALS = "CL" + QUARTER_FINALS = "CD" + CHAMPIONSHIP = "CW" diff --git a/src/baseball_stats_python/statcast/statcast_search.py b/src/baseball_stats_python/statcast/statcast_search.py index da59c74..6dd44a8 100644 --- a/src/baseball_stats_python/statcast/statcast_search.py +++ b/src/baseball_stats_python/statcast/statcast_search.py @@ -14,24 +14,24 @@ from ..utils.utils import validate_date_range logging.basicConfig() -logger = logging.getLogger('Statcast') +logger = logging.getLogger("Statcast") session = requests.Session() -STATCAST_SEARCH_URL = 'https://baseballsavant.mlb.com/statcast_search/csv' +STATCAST_SEARCH_URL = "https://baseballsavant.mlb.com/statcast_search/csv" def statcast_search( - season: str | list[str] = '2024', - player_type: str = 'pitcher', + season: str | list[str] = "2024", + player_type: str = "pitcher", game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON, - start_dt: str = '', - end_dt: str = '', - month: str | Month | list[str | Month] = '', - pitchers_lookup: str | list[str] = '', - batters_lookup: str | list[str] = '', - team: str | MlbTeam | list[str | MlbTeam] = '', - opponent: str | MlbTeam | list[str | MlbTeam] = '', + start_dt: str = "", + end_dt: str = "", + month: str | Month | list[str | Month] = "", + pitchers_lookup: str | list[str] = "", + batters_lookup: str | list[str] = "", + team: str | MlbTeam | list[str | MlbTeam] = "", + opponent: str | MlbTeam | list[str | MlbTeam] = "", debug: bool = False, ) -> pd.DataFrame: """ @@ -60,49 +60,50 @@ def statcast_search( validate_date_range(start_dt, end_dt) params = { - 'all': 'true', - 'player_type': player_type, - 'hfSea': get_season_param_str(season), - 'hfGT': get_game_type_param_str(game_type), - 'game_date_gt': start_dt, - 'game_date_lt': end_dt, - 'hfMo': get_month_param_str(month), - 'hfTeam': get_team_param_str(team), - 'hfOpponent': get_team_param_str(opponent), - 'type': 'details', + "all": "true", + "player_type": player_type, + "hfSea": get_season_param_str(season), + "hfGT": get_game_type_param_str(game_type), + "game_date_gt": start_dt, + "game_date_lt": end_dt, + "hfMo": get_month_param_str(month), + "hfTeam": get_team_param_str(team), + "hfOpponent": get_team_param_str(opponent), + "type": "details", } if pitchers_lookup: - params['pitchers_lookup[]'] = pitchers_lookup + params["pitchers_lookup[]"] = pitchers_lookup if batters_lookup: - params['batters_lookup[]'] = batters_lookup + params["batters_lookup[]"] = batters_lookup - print('Starting Statcast Search') - logger.debug(f'Params: {params}') + print("Starting Statcast Search") + logger.debug(f"Params: {params}") response = session.get(STATCAST_SEARCH_URL, params=params) logger.debug(response.url) + print(response.url) if response.status_code == 200: - print('Statcast Search Completed') + print("Statcast Search Completed") csv_content = io.StringIO(response.text) return pd.read_csv(csv_content) else: raise Exception( - f'Failed to fetch data: {response.status_code} - {response.text}' + f"Failed to fetch data: {response.status_code} - {response.text}" ) def statcast_pitcher_search( pitchers_lookup: str | list[str], - season: str | list[str] = '2024', + season: str | list[str] = "2024", game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON, - start_dt: str = '', - end_dt: str = '', - month: str | Month | list[str | Month] = '', - opponent: str | MlbTeam | list[str | MlbTeam] = '', + start_dt: str = "", + end_dt: str = "", + month: str | Month | list[str | Month] = "", + opponent: str | MlbTeam | list[str | MlbTeam] = "", debug: bool = False, ) -> pd.DataFrame: """ @@ -122,18 +123,18 @@ def statcast_pitcher_search( """ if not pitchers_lookup: - raise ValueError('pitchers_lookup is required') + raise ValueError("pitchers_lookup is required") params = { - 'pitchers_lookup': pitchers_lookup, - 'season': season, - 'player_type': 'pitcher', - 'game_type': game_type, - 'start_dt': start_dt, - 'end_dt': end_dt, - 'month': month, - 'opponent': opponent, - 'debug': debug, + "pitchers_lookup": pitchers_lookup, + "season": season, + "player_type": "pitcher", + "game_type": game_type, + "start_dt": start_dt, + "end_dt": end_dt, + "month": month, + "opponent": opponent, + "debug": debug, } return statcast_search(**params) @@ -141,12 +142,12 @@ def statcast_pitcher_search( def statcast_batter_search( batters_lookup: str | list[str], - season: str | list[str] = '2024', + season: str | list[str] = "2024", game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON, - start_dt: str = '', - end_dt: str = '', - month: str | Month | list[str | Month] = '', - opponent: str | MlbTeam | list[str | MlbTeam] = '', + start_dt: str = "", + end_dt: str = "", + month: str | Month | list[str | Month] = "", + opponent: str | MlbTeam | list[str | MlbTeam] = "", debug: bool = False, ) -> pd.DataFrame: """ @@ -166,18 +167,18 @@ def statcast_batter_search( """ if not batters_lookup: - raise ValueError('batters_lookup is required') + raise ValueError("batters_lookup is required") params = { - 'batters_lookup': batters_lookup, - 'season': season, - 'player_type': 'batter', - 'game_type': game_type, - 'start_dt': start_dt, - 'end_dt': end_dt, - 'month': month, - 'opponent': opponent, - 'debug': debug, + "batters_lookup": batters_lookup, + "season": season, + "player_type": "batter", + "game_type": game_type, + "start_dt": start_dt, + "end_dt": end_dt, + "month": month, + "opponent": opponent, + "debug": debug, } return statcast_search(**params) diff --git a/src/baseball_stats_python/statcast/wbc_statcast_search.py b/src/baseball_stats_python/statcast/wbc_statcast_search.py new file mode 100644 index 0000000..7f33743 --- /dev/null +++ b/src/baseball_stats_python/statcast/wbc_statcast_search.py @@ -0,0 +1,135 @@ +import io +import logging + +import pandas as pd +import requests + +from ..enums.wbc import WbcGameType +from ..utils.wbc import get_wbc_game_type_param_str, get_wbc_season_param_str + +logging.basicConfig() +logger = logging.getLogger() + +session = requests.Session() + + +WBC_STATCAST_SEARCH_URL = ( + "https://baseballsavant.mlb.com/statcast-search-world-baseball-classic/csv" +) + + +def wbc_statcast_search( + season: str | list[str] = "2026", + player_type: str = "pitcher", + game_type: str | WbcGameType | list[str | WbcGameType] = "", + pitchers_lookup: str | list[str] = "", + batters_lookup: str | list[str] = "", + debug: bool = False, +) -> pd.DataFrame: + """ + Search for WBC Statcast pitch-level data with custom filters. + + Args: + season (str | list[str]): The season(s) to search for. + player_type (str): The type of player to search for. + pitchers_lookup (str | list[str]): The pitcher(s) to search for. + batters_lookup (str | list[str]): The batter(s) to search for. + + Returns: + pd.DataFrame: A DataFrame containing the WBC Statcast pitch-level data. + """ + + if debug: + logger.setLevel(logging.DEBUG) + + params = { + "all": "true", + "player_type": player_type, + "hfSea": get_wbc_season_param_str(season), + "hfGT": get_wbc_game_type_param_str(game_type), + "type": "details", + "wbc": "true", + } + + if pitchers_lookup: + params["pitchers_lookup[]"] = pitchers_lookup + + if batters_lookup: + params["batters_lookup[]"] = batters_lookup + + logger.debug(f"Params: {params}") + + print("Starting WBC Statcast Search") + response = session.get(WBC_STATCAST_SEARCH_URL, params=params) + + logger.debug(response.url) + + if response.status_code == 200: + print("WBC Statcast Search Completed") + csv_content = io.StringIO(response.text) + + return pd.read_csv(csv_content) + else: + raise Exception( + f"Failed to fetch data: {response.status_code} - {response.text}" + ) + + +def wbc_statcast_pitcher_search( + pitchers_lookup: str | list[str], + season: str | list[str] = "2026", + game_type: str | WbcGameType | list[str | WbcGameType] = "", + debug: bool = False, +) -> pd.DataFrame: + """ + Search for WBC Statcast pitch-level data for pitcher(s) with custom filters. + Args: + pitchers_lookup (str | list[str]): The pitcher(s) to search for. (Required) + season (str | list[str]): The season(s) to search for. + game_type (str | WbcGameType | list[str | WbcGameType]): The game type(s) to search for. + Returns: + pd.DataFrame: A DataFrame containing the WBC Statcast pitch-level data for target pitcher(s). + """ + + if not pitchers_lookup: + raise ValueError("pitchers_lookup is required") + + params = { + "pitchers_lookup": pitchers_lookup, + "season": season, + "player_type": "pitcher", + "game_type": game_type, + "debug": debug, + } + + return wbc_statcast_search(**params) + + +def wbc_statcast_batter_search( + batters_lookup: str | list[str], + season: str | list[str] = "2026", + game_type: str | WbcGameType | list[str | WbcGameType] = "", + debug: bool = False, +) -> pd.DataFrame: + """ + Search for WBC Statcast pitch-level data for batter(s) with custom filters. + Args: + batters_lookup (str | list[str]): The batter(s) to search for. (Required) + season (str | list[str]): The season(s) to search for. + game_type (str | WbcGameType | list[str | WbcGameType]): The game type(s) to search for. + Returns: + pd.DataFrame: A DataFrame containing the WBC Statcast pitch-level data for target batter(s). + """ + + if not batters_lookup: + raise ValueError("batters_lookup is required") + + params = { + "batters_lookup": batters_lookup, + "season": season, + "player_type": "batter", + "game_type": game_type, + "debug": debug, + } + + return wbc_statcast_search(**params) diff --git a/src/baseball_stats_python/utils/wbc.py b/src/baseball_stats_python/utils/wbc.py new file mode 100644 index 0000000..a63ba66 --- /dev/null +++ b/src/baseball_stats_python/utils/wbc.py @@ -0,0 +1,53 @@ +from ..constants import DEFAULT_SEASON +from ..enums.wbc import WbcGameType + +# TODO: Remove 2025 once Baseball Savant changes their default season +ALL_SEASONS = ["2023", "2026"] + + +def get_wbc_season_param_str(season: str | list[str]) -> str: + if not isinstance(season, str) and not isinstance(season, list): + raise ValueError(f"Invalid type for season: {type(season)}") + + if isinstance(season, list): + if any(season not in ALL_SEASONS for season in season): + raise ValueError(f"Invalid seasons: {season}") + return "|".join(season) + + if season == "": + return str(DEFAULT_SEASON) + if season == "all": + return "|".join(ALL_SEASONS) + + if season not in ALL_SEASONS: + raise ValueError(f"Invalid season: {season}") + + return season + + +def get_wbc_game_type_param_str( + game_type: str | WbcGameType | list[str | WbcGameType], +) -> str: + if ( + not isinstance(game_type, str) + and not isinstance(game_type, list) + and not isinstance(game_type, WbcGameType) + ): + raise ValueError(f"Invalid type for game_type: {type(game_type)}") + + if isinstance(game_type, list): + str_game_type = [str(game_type) for game_type in game_type] + if any(not WbcGameType.has_value(game_type) for game_type in str_game_type): + raise ValueError(f"Invalid game types: {'|'.join(str_game_type)}") + return f"{'|'.join(str_game_type)}|" + + if game_type == "": + return "F|CL|CD|CW|" + + if game_type == "all": + return f"{WbcGameType.join_all()}|" + + if not WbcGameType.has_value(game_type): + raise ValueError(f"Invalid game type: {game_type}") + + return f"{game_type}|" diff --git a/tests/statcast/test_wbc_statcast_search.py b/tests/statcast/test_wbc_statcast_search.py new file mode 100644 index 0000000..d7bb88c --- /dev/null +++ b/tests/statcast/test_wbc_statcast_search.py @@ -0,0 +1,18 @@ +import pytest + +from baseball_stats_python.statcast.wbc_statcast_search import ( + wbc_statcast_batter_search, + wbc_statcast_pitcher_search, +) + + +def test_wbc_statcast_pitcher_search_invalid(): + with pytest.raises(ValueError) as e: + wbc_statcast_pitcher_search(pitchers_lookup="") + assert str(e.value) == "pitchers_lookup is required" + + +def test_wbc_statcast_batter_search_invalid(): + with pytest.raises(ValueError) as e: + wbc_statcast_batter_search(batters_lookup="") + assert str(e.value) == "batters_lookup is required"