diff --git a/project/Final_Report.ipynb b/project/Final_Report.ipynb index 09071c6..94c0e3a 100644 --- a/project/Final_Report.ipynb +++ b/project/Final_Report.ipynb @@ -50,13 +50,30 @@ ] }, { + "cell_type": "code", + "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2024-12-09T15:24:16.987194Z", "start_time": "2024-12-09T15:24:16.974515Z" } }, - "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python version: 3.10.13 (main, Sep 11 2023, 08:24:56) [Clang 14.0.6 ]\n", + "Pandas version: 2.2.2\n", + "Numpy version: 1.23.5\n", + "Matplotlib version: 3.8.4\n", + "Seaborn version: 0.13.2\n", + "FastF1 version: 3.4.4\n", + "Scikit-learn version: 1.5.1\n", + "XGBoost version: 2.1.1\n" + ] + } + ], "source": [ "# Importing Libraries\n", "import sys\n", @@ -90,28 +107,11 @@ "print(f'FastF1 version: {fastf1.__version__}')\n", "print(f'Scikit-learn version: {sys.modules[\"sklearn\"].__version__}')\n", "print(f'XGBoost version: {xgb.__version__}')" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Python version: 3.10.13 (main, Sep 11 2023, 08:24:56) [Clang 14.0.6 ]\n", - "Pandas version: 2.2.2\n", - "Numpy version: 1.23.5\n", - "Matplotlib version: 3.8.4\n", - "Seaborn version: 0.13.2\n", - "FastF1 version: 3.4.4\n", - "Scikit-learn version: 1.5.1\n", - "XGBoost version: 2.1.1\n" - ] - } - ], - "execution_count": 7 + ] }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": [ "## Problem Statement\n", "We are analyzing Formula One driver performance to understand and predict race outcomes based on various conditions. Specifically, we aim to:\n", @@ -123,13 +123,15 @@ ] }, { + "cell_type": "code", + "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2024-12-09T15:24:17.026610Z", "start_time": "2024-12-09T15:24:17.013465Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "# Set up FastF1 plotting and caching\n", "cache_dir = '../data/cache'\n", @@ -138,30 +140,28 @@ "\n", "fastf1.Cache.enable_cache(cache_dir)\n", "fastf1.plotting.setup_mpl(misc_mpl_mods=False, color_scheme=None)" - ], - "outputs": [], - "execution_count": 8 + ] }, { + "cell_type": "code", + "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2024-12-09T15:24:17.053839Z", "start_time": "2024-12-09T15:24:17.050836Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "# Define years, sessions, and events of interest\n", "years = [2021, 2022, 2023, 2024]\n", "sessions = ['Race']\n", - "events = ['Bahrain Grand Prix', 'British Grand Prix', 'United States Grand Prix', 'Mexico City Grand Prix', 'São Paulo Grand Prix'] " - ], - "outputs": [], - "execution_count": 9 + "events = ['Bahrain Grand Prix', 'British Grand Prix', 'Belgian Grand Prix', 'United States Grand Prix', 'Mexico City Grand Prix'] " + ] }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": [ "## Why these events, sessions, and years?\n", "\n", @@ -170,9 +170,9 @@ "Each event has a specific set of conditions that may affect driver performance:\n", "- Bahrain: Hot and humid, with high track temperatures\n", "- British: Cool and changeable, with frequent rain\n", + "- Belgian: Overcast and cool, with frequent weather changes\n", "- United States: Very hot, with high track temperatures\n", "- Mexico City: Cool and changeable, with frequent rain\n", - "- São Paulo: Hot and humid, with high track temperatures and heavy rain\n", "\n", "As for years, we chose 2021 to 2024 because they are the most recent years for which data is available. In 2021, the regulations changed to allow for more overtaking, so the lap times became incomparable to that of previous years.\n", "\n", @@ -180,13 +180,43 @@ ] }, { + "cell_type": "code", + "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2024-12-09T15:24:20.703407Z", "start_time": "2024-12-09T15:24:17.065829Z" } }, - "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing 2021 Bahrain Grand Prix - Race\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "core INFO \tLoading data for Bahrain Grand Prix - Race [v3.4.4]\n", + "req INFO \tUsing cached data for session_info\n", + "req INFO \tUsing cached data for driver_info\n", + "req INFO \tUsing cached data for session_status_data\n", + "req INFO \tUsing cached data for lap_count\n", + "req INFO \tUsing cached data for track_status_data\n", + "req INFO \tUsing cached data for _extended_timing_data\n", + "req INFO \tUsing cached data for timing_app_data\n", + "core INFO \tProcessing timing data...\n", + "req INFO \tUsing cached data for car_data\n", + "req INFO \tUsing cached data for position_data\n", + "req INFO \tUsing cached data for weather_data\n", + "req INFO \tUsing cached data for race_control_messages\n", + "core INFO \tFinished loading data for 20 drivers: ['44', '33', '77', '4', '11', '16', '3', '55', '22', '18', '7', '99', '31', '63', '5', '47', '10', '6', '14', '9']\n" + ] + } + ], "source": [ "# Get data from FastF1 API\n", "\n", @@ -290,41 +320,11 @@ " direction='nearest',\n", " tolerance=pd.Timedelta('1 min') # Allow matching within 1 minute\n", ")" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing 2021 Bahrain Grand Prix - Race\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "core INFO \tLoading data for Bahrain Grand Prix - Race [v3.4.4]\n", - "req INFO \tUsing cached data for session_info\n", - "req INFO \tUsing cached data for driver_info\n", - "req INFO \tUsing cached data for session_status_data\n", - "req INFO \tUsing cached data for lap_count\n", - "req INFO \tUsing cached data for track_status_data\n", - "req INFO \tUsing cached data for _extended_timing_data\n", - "req INFO \tUsing cached data for timing_app_data\n", - "core INFO \tProcessing timing data...\n", - "req INFO \tUsing cached data for car_data\n", - "req INFO \tUsing cached data for position_data\n", - "req INFO \tUsing cached data for weather_data\n", - "req INFO \tUsing cached data for race_control_messages\n", - "core INFO \tFinished loading data for 20 drivers: ['44', '33', '77', '4', '11', '16', '3', '55', '22', '18', '7', '99', '31', '63', '5', '47', '10', '6', '14', '9']\n" - ] - } - ], - "execution_count": 10 + ] }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": [ "## Data Description\n", "Our data comes from the FastF1 API, which provides detailed Formula One racing data. Each observation represents a single lap during a race or qualifying session, including:\n", @@ -348,49 +348,17 @@ ] }, { + "cell_type": "code", + "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2024-12-09T15:25:18.878045Z", "start_time": "2024-12-09T15:25:18.858848Z" } }, - "cell_type": "code", - "source": [ - "# Display a sample of the raw data\n", - "lap_data_combined.head()" - ], "outputs": [ { "data": { - "text/plain": [ - " Time Year Event Session Driver \\\n", - "1 2021-01-01 00:41:37.134 2021 Bahrain Grand Prix Race GAS \n", - "4 2021-01-01 00:48:28.044 2021 Bahrain Grand Prix Race GAS \n", - "5 2021-01-01 00:50:04.721 2021 Bahrain Grand Prix Race GAS \n", - "6 2021-01-01 00:51:41.675 2021 Bahrain Grand Prix Race GAS \n", - "8 2021-01-01 00:54:56.129 2021 Bahrain Grand Prix Race GAS \n", - "\n", - " Team LapNumber LapTime Sector1Time \\\n", - "1 AlphaTauri 2.0 0 days 00:02:22.263000 0 days 00:00:45.220000 \n", - "4 AlphaTauri 5.0 0 days 00:02:11.534000 0 days 00:01:05.748000 \n", - "5 AlphaTauri 6.0 0 days 00:01:36.677000 0 days 00:00:30.990000 \n", - "6 AlphaTauri 7.0 0 days 00:01:36.954000 0 days 00:00:31.176000 \n", - "8 AlphaTauri 9.0 0 days 00:01:37.030000 0 days 00:00:31.256000 \n", - "\n", - " Sector2Time Sector3Time Compound TyreLife FreshTyre \\\n", - "1 0 days 00:01:00.086000 0 days 00:00:36.957000 MEDIUM 5.0 False \n", - "4 0 days 00:00:41.956000 0 days 00:00:23.830000 HARD 1.0 True \n", - "5 0 days 00:00:41.802000 0 days 00:00:23.885000 HARD 2.0 True \n", - "6 0 days 00:00:41.678000 0 days 00:00:24.100000 HARD 3.0 True \n", - "8 0 days 00:00:41.911000 0 days 00:00:23.863000 HARD 5.0 True \n", - "\n", - " SpeedI1 SpeedI2 SpeedFL SpeedST LapTime_seconds \n", - "1 120.0 134.0 182.0 236.0 142.263 \n", - "4 231.0 251.0 275.0 213.0 131.534 \n", - "5 233.0 254.0 275.0 280.0 96.677 \n", - "6 232.0 252.0 274.0 282.0 96.954 \n", - "8 234.0 248.0 276.0 286.0 97.030 " - ], "text/html": [ "
\n", "