diff --git a/project/Final_Report.ipynb b/project/Final_Report.ipynb index b83f6a0..3443508 100644 --- a/project/Final_Report.ipynb +++ b/project/Final_Report.ipynb @@ -51,12 +51,30 @@ }, { "cell_type": "code", + "execution_count": 56, "metadata": { "ExecuteTime": { "end_time": "2024-12-10T18:38:06.152974Z", "start_time": "2024-12-10T18:38:06.139745Z" } }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python version: 3.10.13 (main, Sep 11 2023, 08:24:56) [Clang 14.0.6 ]\n", + "Pandas version: 2.2.2\n", + "Numpy version: 1.23.5\n", + "Matplotlib version: 3.8.4\n", + "Seaborn version: 0.13.2\n", + "FastF1 version: 3.4.4\n", + "Scikit-learn version: 1.5.1\n", + "XGBoost version: 2.1.1\n", + "LightGBM version: 4.5.0\n" + ] + } + ], "source": [ "# Importing Libraries\n", "import sys\n", @@ -97,25 +115,7 @@ "print(f'Scikit-learn version: {sys.modules[\"sklearn\"].__version__}')\n", "print(f'XGBoost version: {xgb.__version__}')\n", "print(f'LightGBM version: {sys.modules[\"lightgbm\"].__version__}')" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Python version: 3.10.13 (main, Sep 11 2023, 08:24:56) [Clang 14.0.6 ]\n", - "Pandas version: 2.2.2\n", - "Numpy version: 1.23.5\n", - "Matplotlib version: 3.8.4\n", - "Seaborn version: 0.13.2\n", - "FastF1 version: 3.4.4\n", - "Scikit-learn version: 1.5.1\n", - "XGBoost version: 2.1.1\n", - "LightGBM version: 4.5.0\n" - ] - } - ], - "execution_count": 56 + ] }, { "cell_type": "markdown", @@ -132,12 +132,14 @@ }, { "cell_type": "code", + "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2024-12-10T17:42:22.557629Z", "start_time": "2024-12-10T17:42:22.532022Z" } }, + "outputs": [], "source": [ "# Set up FastF1 plotting and caching\n", "cache_dir = '../data/cache'\n", @@ -146,26 +148,24 @@ "\n", "fastf1.Cache.enable_cache(cache_dir)\n", "fastf1.plotting.setup_mpl(misc_mpl_mods=False, color_scheme=None)" - ], - "outputs": [], - "execution_count": 18 + ] }, { "cell_type": "code", + "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2024-12-10T17:42:23.135727Z", "start_time": "2024-12-10T17:42:23.132572Z" } }, + "outputs": [], "source": [ "# Define years, sessions, and events of interest\n", "years = [2021, 2022, 2023, 2024]\n", "sessions = ['Race']\n", "events = ['Bahrain Grand Prix', 'Saudi Arabian Grand Prix', 'Dutch Grand Prix', 'Italian Grand Prix', 'Austrian Grand Prix', 'Hungarian Grand Prix', 'British Grand Prix', 'Belgian Grand Prix', 'United States Grand Prix', 'Mexico City Grand Prix', 'Sao Paulo Grand Prix']" - ], - "outputs": [], - "execution_count": 19 + ] }, { "cell_type": "markdown", @@ -188,112 +188,14 @@ ] }, { + "cell_type": "code", + "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2024-12-10T17:45:57.601953Z", "start_time": "2024-12-10T17:42:24.338445Z" } }, - "cell_type": "code", - "source": [ - "# Get data from FastF1 API\n", - "\n", - "# Data containers\n", - "weather_data_list = []\n", - "lap_data_list = []\n", - "\n", - "# Loop through years and sessions\n", - "for year in years:\n", - " for event_name in events: \n", - " for session_name in sessions:\n", - " try:\n", - " print(f\"Processing {year} {event_name} - {session_name}\")\n", - " \n", - " # Load the session\n", - " session = fastf1.get_session(year, event_name, session_name, backend='fastf1')\n", - " session.load()\n", - " \n", - " # Process weather data\n", - " weather_data = session.weather_data\n", - " if weather_data is not None:\n", - " weather_df = pd.DataFrame(weather_data)\n", - " # Add context columns\n", - " weather_df['Year'] = year\n", - " weather_df['Event'] = event_name\n", - " weather_df['Session'] = session_name\n", - " weather_data_list.append(weather_df)\n", - "\n", - " # Process lap data\n", - " lap_data = session.laps\n", - " if lap_data is not None:\n", - " lap_df = pd.DataFrame(lap_data)\n", - " # Add context columns\n", - " lap_df['Year'] = year\n", - " lap_df['Event'] = event_name\n", - " lap_df['Session'] = session_name\n", - " # Ensure driver information is included\n", - " if 'Driver' not in lap_df.columns:\n", - " lap_df['Driver'] = lap_df['DriverNumber'].map(session.drivers)\n", - " # Add team information if available\n", - " if 'Team' not in lap_df.columns:\n", - " lap_df['Team'] = lap_df['Driver'].map(session.drivers_info['TeamName'])\n", - " lap_data_list.append(lap_df)\n", - " \n", - " except Exception as e:\n", - " print(f\"Error with {event_name} {session_name} ({year}): {e}\")\n", - "\n", - "# Combine data into DataFrames\n", - "if weather_data_list:\n", - " weather_data_combined = pd.concat(weather_data_list, ignore_index=True)\n", - " # Ensure consistent column ordering\n", - " weather_cols = ['Time', 'Year', 'Event', 'Session', \n", - " 'AirTemp', 'Humidity', 'Pressure', 'Rainfall', \n", - " 'TrackTemp', 'WindDirection', 'WindSpeed']\n", - " weather_data_combined = weather_data_combined[weather_cols]\n", - " \n", - "if lap_data_list:\n", - " lap_data_combined = pd.concat(lap_data_list, ignore_index=True)\n", - " # Ensure consistent column ordering\n", - " lap_cols = ['Time', 'Year', 'Event', 'Session', \n", - " 'Driver', 'Team', 'LapNumber', 'LapTime',\n", - " 'Sector1Time', 'Sector2Time', 'Sector3Time',\n", - " 'Compound', 'TyreLife', 'FreshTyre',\n", - " 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST']\n", - " # Only include columns that exist\n", - " existing_cols = [col for col in lap_cols if col in lap_data_combined.columns]\n", - " lap_data_combined = lap_data_combined[existing_cols]\n", - " \n", - "# Time conversion\n", - "# Function to convert timedelta to datetime\n", - "def convert_timedelta_to_datetime(df, base_date='2021-01-01'):\n", - " if 'Time' in df.columns:\n", - " # Create a base datetime and add the timedelta\n", - " base = pd.Timestamp(base_date)\n", - " if df['Time'].dtype == 'timedelta64[ns]':\n", - " df['Time'] = base + df['Time']\n", - " return df\n", - "\n", - "# Apply conversion to both dataframes\n", - "weather_data_combined = convert_timedelta_to_datetime(weather_data_combined)\n", - "lap_data_combined = convert_timedelta_to_datetime(lap_data_combined)\n", - "\n", - "# Remove missing values\n", - "weather_data_combined = weather_data_combined.dropna()\n", - "lap_data_combined = lap_data_combined.dropna()\n", - "\n", - "# Create a new column for lap time in seconds\n", - "lap_data_combined['LapTime_seconds'] = lap_data_combined['LapTime'].dt.total_seconds()\n", - "\n", - "# Merge the data\n", - "merged_data = pd.merge_asof(\n", - " lap_data_combined.sort_values('Time'),\n", - " weather_data_combined.sort_values('Time'),\n", - " on='Time',\n", - " by=['Event', 'Year'], # Match within same event and year\n", - " direction='nearest',\n", - " tolerance=pd.Timedelta('1 min') # Allow matching within 1 minute\n", - ")\n" - ], "outputs": [ { "name": "stderr", @@ -1604,7 +1506,105 @@ ] } ], - "execution_count": 20 + "source": [ + "# Get data from FastF1 API\n", + "\n", + "# Data containers\n", + "weather_data_list = []\n", + "lap_data_list = []\n", + "\n", + "# Loop through years and sessions\n", + "for year in years:\n", + " for event_name in events: \n", + " for session_name in sessions:\n", + " try:\n", + " print(f\"Processing {year} {event_name} - {session_name}\")\n", + " \n", + " # Load the session\n", + " session = fastf1.get_session(year, event_name, session_name, backend='fastf1')\n", + " session.load()\n", + " \n", + " # Process weather data\n", + " weather_data = session.weather_data\n", + " if weather_data is not None:\n", + " weather_df = pd.DataFrame(weather_data)\n", + " # Add context columns\n", + " weather_df['Year'] = year\n", + " weather_df['Event'] = event_name\n", + " weather_df['Session'] = session_name\n", + " weather_data_list.append(weather_df)\n", + "\n", + " # Process lap data\n", + " lap_data = session.laps\n", + " if lap_data is not None:\n", + " lap_df = pd.DataFrame(lap_data)\n", + " # Add context columns\n", + " lap_df['Year'] = year\n", + " lap_df['Event'] = event_name\n", + " lap_df['Session'] = session_name\n", + " # Ensure driver information is included\n", + " if 'Driver' not in lap_df.columns:\n", + " lap_df['Driver'] = lap_df['DriverNumber'].map(session.drivers)\n", + " # Add team information if available\n", + " if 'Team' not in lap_df.columns:\n", + " lap_df['Team'] = lap_df['Driver'].map(session.drivers_info['TeamName'])\n", + " lap_data_list.append(lap_df)\n", + " \n", + " except Exception as e:\n", + " print(f\"Error with {event_name} {session_name} ({year}): {e}\")\n", + "\n", + "# Combine data into DataFrames\n", + "if weather_data_list:\n", + " weather_data_combined = pd.concat(weather_data_list, ignore_index=True)\n", + " # Ensure consistent column ordering\n", + " weather_cols = ['Time', 'Year', 'Event', 'Session', \n", + " 'AirTemp', 'Humidity', 'Pressure', 'Rainfall', \n", + " 'TrackTemp', 'WindDirection', 'WindSpeed']\n", + " weather_data_combined = weather_data_combined[weather_cols]\n", + " \n", + "if lap_data_list:\n", + " lap_data_combined = pd.concat(lap_data_list, ignore_index=True)\n", + " # Ensure consistent column ordering\n", + " lap_cols = ['Time', 'Year', 'Event', 'Session', \n", + " 'Driver', 'Team', 'LapNumber', 'LapTime',\n", + " 'Sector1Time', 'Sector2Time', 'Sector3Time',\n", + " 'Compound', 'TyreLife', 'FreshTyre',\n", + " 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST']\n", + " # Only include columns that exist\n", + " existing_cols = [col for col in lap_cols if col in lap_data_combined.columns]\n", + " lap_data_combined = lap_data_combined[existing_cols]\n", + " \n", + "# Time conversion\n", + "# Function to convert timedelta to datetime\n", + "def convert_timedelta_to_datetime(df, base_date='2021-01-01'):\n", + " if 'Time' in df.columns:\n", + " # Create a base datetime and add the timedelta\n", + " base = pd.Timestamp(base_date)\n", + " if df['Time'].dtype == 'timedelta64[ns]':\n", + " df['Time'] = base + df['Time']\n", + " return df\n", + "\n", + "# Apply conversion to both dataframes\n", + "weather_data_combined = convert_timedelta_to_datetime(weather_data_combined)\n", + "lap_data_combined = convert_timedelta_to_datetime(lap_data_combined)\n", + "\n", + "# Remove missing values\n", + "weather_data_combined = weather_data_combined.dropna()\n", + "lap_data_combined = lap_data_combined.dropna()\n", + "\n", + "# Create a new column for lap time in seconds\n", + "lap_data_combined['LapTime_seconds'] = lap_data_combined['LapTime'].dt.total_seconds()\n", + "\n", + "# Merge the data\n", + "merged_data = pd.merge_asof(\n", + " lap_data_combined.sort_values('Time'),\n", + " weather_data_combined.sort_values('Time'),\n", + " on='Time',\n", + " by=['Event', 'Year'], # Match within same event and year\n", + " direction='nearest',\n", + " tolerance=pd.Timedelta('1 min') # Allow matching within 1 minute\n", + ")\n" + ] }, { "cell_type": "markdown", @@ -1633,48 +1633,16 @@ }, { "cell_type": "code", + "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2024-12-10T17:46:25.733104Z", "start_time": "2024-12-10T17:46:25.694313Z" } }, - "source": [ - "# Display a sample of the raw data\n", - "lap_data_combined.head()" - ], "outputs": [ { "data": { - "text/plain": [ - " Time Year Event Session Driver \\\n", - "1 2021-01-01 00:41:37.134 2021 Bahrain Grand Prix Race GAS \n", - "4 2021-01-01 00:48:28.044 2021 Bahrain Grand Prix Race GAS \n", - "5 2021-01-01 00:50:04.721 2021 Bahrain Grand Prix Race GAS \n", - "6 2021-01-01 00:51:41.675 2021 Bahrain Grand Prix Race GAS \n", - "8 2021-01-01 00:54:56.129 2021 Bahrain Grand Prix Race GAS \n", - "\n", - " Team LapNumber LapTime Sector1Time \\\n", - "1 AlphaTauri 2.0 0 days 00:02:22.263000 0 days 00:00:45.220000 \n", - "4 AlphaTauri 5.0 0 days 00:02:11.534000 0 days 00:01:05.748000 \n", - "5 AlphaTauri 6.0 0 days 00:01:36.677000 0 days 00:00:30.990000 \n", - "6 AlphaTauri 7.0 0 days 00:01:36.954000 0 days 00:00:31.176000 \n", - "8 AlphaTauri 9.0 0 days 00:01:37.030000 0 days 00:00:31.256000 \n", - "\n", - " Sector2Time Sector3Time Compound TyreLife FreshTyre \\\n", - "1 0 days 00:01:00.086000 0 days 00:00:36.957000 MEDIUM 5.0 False \n", - "4 0 days 00:00:41.956000 0 days 00:00:23.830000 HARD 1.0 True \n", - "5 0 days 00:00:41.802000 0 days 00:00:23.885000 HARD 2.0 True \n", - "6 0 days 00:00:41.678000 0 days 00:00:24.100000 HARD 3.0 True \n", - "8 0 days 00:00:41.911000 0 days 00:00:23.863000 HARD 5.0 True \n", - "\n", - " SpeedI1 SpeedI2 SpeedFL SpeedST LapTime_seconds \n", - "1 120.0 134.0 182.0 236.0 142.263 \n", - "4 231.0 251.0 275.0 213.0 131.534 \n", - "5 233.0 254.0 275.0 280.0 96.677 \n", - "6 232.0 252.0 274.0 282.0 96.954 \n", - "8 234.0 248.0 276.0 286.0 97.030 " - ], "text/html": [ "
\n", "