@ -26,7 +26,7 @@
},
{
"cell_type": "code",
"execution_count": 176 ,
"execution_count": 2 ,
"id": "c0f0ed8f",
"metadata": {},
"outputs": [],
@ -35,7 +35,7 @@
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"df = pd.read_csv(\"carDetailsV4 .csv\", encoding=\"latin-1\")"
"df = pd.read_csv(\"carDetailsOld .csv\", encoding=\"latin-1\")"
]
},
{
@ -48,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 177 ,
"execution_count": 3 ,
"id": "65ea7cfb",
"metadata": {},
"outputs": [
@ -410,7 +410,7 @@
"[2059 rows x 20 columns]"
]
},
"execution_count": 177 ,
"execution_count": 3 ,
"metadata": {},
"output_type": "execute_result"
}
@ -423,7 +423,7 @@
},
{
"cell_type": "code",
"execution_count": 178 ,
"execution_count": 4 ,
"id": "d846d8e4",
"metadata": {},
"outputs": [
@ -452,7 +452,7 @@
},
{
"cell_type": "code",
"execution_count": 179 ,
"execution_count": 5 ,
"id": "2aea6e9f",
"metadata": {},
"outputs": [
@ -503,7 +503,7 @@
},
{
"cell_type": "code",
"execution_count": 180 ,
"execution_count": 6 ,
"id": "b7406055",
"metadata": {},
"outputs": [
@ -558,151 +558,30 @@
},
{
"cell_type": "code",
"execution_count": 181 ,
"execution_count": 10 ,
"id": "c068815f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Make</th>\n",
" <th>Model</th>\n",
" <th>Price</th>\n",
" <th>Year</th>\n",
" <th>Kilometer</th>\n",
" <th>Fuel Type</th>\n",
" <th>Transmission</th>\n",
" <th>Location</th>\n",
" <th>Color</th>\n",
" <th>Owner</th>\n",
" <th>Seller Type</th>\n",
" <th>Engine</th>\n",
" <th>Max Power</th>\n",
" <th>Max Torque</th>\n",
" <th>Drivetrain</th>\n",
" <th>Length</th>\n",
" <th>Width</th>\n",
" <th>Height</th>\n",
" <th>Seating Capacity</th>\n",
" <th>Fuel Tank Capacity</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>Hyundai</td>\n",
" <td>Creta 1.6 SX Plus AT</td>\n",
" <td>925000</td>\n",
" <td>2016</td>\n",
" <td>66000</td>\n",
" <td>Diesel</td>\n",
" <td>Automatic</td>\n",
" <td>Raipur</td>\n",
" <td>Black</td>\n",
" <td>First</td>\n",
" <td>Individual</td>\n",
" <td>1582 cc</td>\n",
" <td>126 bhp @ 4000 rpm</td>\n",
" <td>265 Nm @ 1900 rpm</td>\n",
" <td>FWD</td>\n",
" <td>4270.0</td>\n",
" <td>1780.0</td>\n",
" <td>1630.0</td>\n",
" <td>5.0</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>Ford</td>\n",
" <td>Ecosport Titanium+ 1.0L EcoBoost</td>\n",
" <td>535000</td>\n",
" <td>2015</td>\n",
" <td>28000</td>\n",
" <td>Petrol</td>\n",
" <td>Manual</td>\n",
" <td>Mumbai</td>\n",
" <td>Silver</td>\n",
" <td>First</td>\n",
" <td>Individual</td>\n",
" <td>999 cc</td>\n",
" <td>124 bhp @ 6000 rpm</td>\n",
" <td>170 Nm @ 1400 rpm</td>\n",
" <td>FWD</td>\n",
" <td>3999.0</td>\n",
" <td>1765.0</td>\n",
" <td>1708.0</td>\n",
" <td>5.0</td>\n",
" <td>52.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>Hyundai</td>\n",
" <td>Santro GL (CNG)</td>\n",
" <td>145000</td>\n",
" <td>2009</td>\n",
" <td>72000</td>\n",
" <td>CNG</td>\n",
" <td>Manual</td>\n",
" <td>Kanpur</td>\n",
" <td>Silver</td>\n",
" <td>Second</td>\n",
" <td>Individual</td>\n",
" <td>1086 cc</td>\n",
" <td>62 bhp @ 5500 rpm</td>\n",
" <td>96 Nm @ 3000 rpm</td>\n",
" <td>FWD</td>\n",
" <td>3565.0</td>\n",
" <td>1525.0</td>\n",
" <td>1590.0</td>\n",
" <td>5.0</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Make Model Price Year Kilometer \\\n",
"31 Hyundai Creta 1.6 SX Plus AT 925000 2016 66000 \n",
"32 Ford Ecosport Titanium+ 1.0L EcoBoost 535000 2015 28000 \n",
"34 Hyundai Santro GL (CNG) 145000 2009 72000 \n",
"\n",
" Fuel Type Transmission Location Color Owner Seller Type Engine \\\n",
"31 Diesel Automatic Raipur Black First Individual 1582 cc \n",
"32 Petrol Manual Mumbai Silver First Individual 999 cc \n",
"34 CNG Manual Kanpur Silver Second Individual 1086 cc \n",
"\n",
" Max Power Max Torque Drivetrain Length Width Height \\\n",
"31 126 bhp @ 4000 rpm 265 Nm @ 1900 rpm FWD 4270.0 1780.0 1630.0 \n",
"32 124 bhp @ 6000 rpm 170 Nm @ 1400 rpm FWD 3999.0 1765.0 1708.0 \n",
"34 62 bhp @ 5500 rpm 96 Nm @ 3000 rpm FWD 3565.0 1525.0 1590.0 \n",
"\n",
" Seating Capacity Fuel Tank Capacity \n",
"31 5.0 60.0 \n",
"32 5.0 52.0 \n",
"34 5.0 35.0 "
]
},
"metadata": {},
"output_type": "display_data"
"ename": "KeyError",
"evalue": "'Color'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/base.py:3621\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3620\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 3621\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_engine\u001b[39m.\u001b[39;49mget_loc(casted_key)\n\u001b[1;32m 3622\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n",
"File \u001b[0;32m/usr/local/lib/python3.9/dist-packages/pandas/_libs/index.pyx:136\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/usr/local/lib/python3.9/dist-packages/pandas/_libs/index.pyx:163\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5198\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5206\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'Color'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/UCA/anperederi/SAE/SAE2.04-Exploitation_d_une_base_de_donnees/csvCleaner.ipynb Cellule 12\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/UCA/anperederi/SAE/SAE2.04-Exploitation_d_une_base_de_donnees/csvCleaner.ipynb#X55sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mdel\u001b[39;00m df[\u001b[39m\"\u001b[39m\u001b[39mColor\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m <a href='vscode-notebook-cell:/home/UCA/anperederi/SAE/SAE2.04-Exploitation_d_une_base_de_donnees/csvCleaner.ipynb#X55sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mdel\u001b[39;00m df[\u001b[39m'\u001b[39m\u001b[39mLocation\u001b[39m\u001b[39m'\u001b[39m]\n\u001b[1;32m <a href='vscode-notebook-cell:/home/UCA/anperederi/SAE/SAE2.04-Exploitation_d_une_base_de_donnees/csvCleaner.ipynb#X55sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mread_csv(\u001b[39m\"\u001b[39m\u001b[39mcarDetailsOld.csv\u001b[39m\u001b[39m\"\u001b[39m, encoding\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mlatin-1\u001b[39m\u001b[39m\"\u001b[39m)\n",
"File \u001b[0;32m/usr/local/lib/python3.9/dist-packages/pandas/core/generic.py:4048\u001b[0m, in \u001b[0;36mNDFrame.__delitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 4043\u001b[0m deleted \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[1;32m 4044\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m deleted:\n\u001b[1;32m 4045\u001b[0m \u001b[39m# If the above loop ran and didn't delete anything because\u001b[39;00m\n\u001b[1;32m 4046\u001b[0m \u001b[39m# there was no match, this call should raise the appropriate\u001b[39;00m\n\u001b[1;32m 4047\u001b[0m \u001b[39m# exception:\u001b[39;00m\n\u001b[0;32m-> 4048\u001b[0m loc \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49maxes[\u001b[39m-\u001b[39;49m\u001b[39m1\u001b[39;49m]\u001b[39m.\u001b[39;49mget_loc(key)\n\u001b[1;32m 4049\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mgr\u001b[39m.\u001b[39midelete(loc)\n\u001b[1;32m 4051\u001b[0m \u001b[39m# delete from the caches\u001b[39;00m\n",
"File \u001b[0;32m/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/base.py:3623\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3621\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine\u001b[39m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m 3622\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m-> 3623\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(key) \u001b[39mfrom\u001b[39;00m \u001b[39merr\u001b[39;00m\n\u001b[1;32m 3624\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m:\n\u001b[1;32m 3625\u001b[0m \u001b[39m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3626\u001b[0m \u001b[39m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3627\u001b[0m \u001b[39m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3628\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_indexing_error(key)\n",
"\u001b[0;31mKeyError\u001b[0m: 'Color'"
]
}
],
"source": [
@ -710,7 +589,7 @@
"del df[\"Color\"]\n",
"del df['Location']\n",
"\n",
"df = pd.read_csv(\"carDetailsV4 .csv\", encoding=\"latin-1\")\n",
"df = pd.read_csv(\"carDetailsOld .csv\", encoding=\"latin-1\")\n",
"df=df.dropna(axis=0)\n",
"\n",
"#Permet d'afficher le dataframe\n",
@ -726,32 +605,10 @@
},
{
"cell_type": "code",
"execution_count": 182 ,
"execution_count": null ,
"id": "69d69464",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1198\n",
"1 1248\n",
"2 1197\n",
"3 1197\n",
"4 2393\n",
" ... \n",
"2053 1197\n",
"2054 2179\n",
"2055 814\n",
"2056 1196\n",
"2057 1995\n",
"Name: Engine, Length: 1874, dtype: object"
]
},
"execution_count": 182,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"df1[\"Engine\"]"
]
@ -761,37 +618,17 @@
"execution_count": null,
"id": "ee792795",
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mLe kernel n’ a pas pu démarrer en raison de l''pygments.formatters' de module manquant. Envisagez d’ installer ce module.\n",
"\u001b[1;31mCliquez sur <a href='https://aka.ms/kernelFailuresMissingModule'>ici</a> pour plus d’ informations."
]
}
],
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 184 ,
"execution_count": null,
"id": "6704d8d5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Empty DataFrame\n",
"Columns: [Make, Model, Price, Year, Kilometer, Fuel Type, Transmission, Location, Color, Owner, Seller Type, Engine, Max Power, Max Torque, Drivetrain, Length, Width, Height, Seating Capacity, Fuel Tank Capacity]\n",
"Index: []\n"
]
}
],
"outputs": [],
"source": [
"print(df[df['Engine']==''])"
]
@ -873,7 +710,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "a3bf3a6f",
"metadata": {},
@ -903,7 +739,7 @@
},
{
"cell_type": "code",
"execution_count": 186 ,
"execution_count": null ,
"id": "1b0173e3",
"metadata": {},
"outputs": [
@ -955,12 +791,27 @@
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mLe kernel n’ a pas pu démarrer en raison de l''pygments.formatters' de module manquant. Envisagez d’ installer ce module.\n",
"\u001b[1;31mCliquez sur <a href='https://aka.ms/kernelFailuresMissingModule'>ici</a> pour plus d’ informations."
"name": "stdout",
"output_type": "stream",
"text": [
"Prix moyen d'une Audi : 2703134 ₹\n",
"\n",
"Prix moyen d'une BMW : 3768967 ₹\n",
"\n",
"En moyenne, les BMW sont plus chers que les Audi\n",
"\n",
"Année moyenne d'un Audi : 2016 \n",
"\n",
"Année moyenne d'un BMW : 2017 \n",
"\n",
"La BMW est plus récente que l'Audi en moyenne.\n",
"\n",
"Kilométrage moyen d'un Audi : 54319 km\n",
"\n",
"Kilométrage moyen d'un BMW : 50453 km\n",
"\n",
"En moyenne,l'Audi a plus de kilomètres que le BMW\n",
"\n"
]
}
],
@ -997,13 +848,13 @@
"# print(vehicule1[\"Engine\"])\n",
"\n",
"# print(\"Puissance moyenne d'un Audi : \", int(vehicule1[\"Engine\"].mean().round()), \"ch\\n\")\n",
"print(\"Puissance moyenne d'un BMW : \", float(vehicule2[\"Engine\"].mean().round()), \"ch\\n\")\n",
"print(\"ligne : \", df[\"Engine\"])\n",
"# print(\"Puissance moyenne d'un BMW : \", float(vehicule2[\"Engine\"].mean().round()), \"ch\\n\")\n",
"# print(\"ligne : \", df[\"Engine\"])\n",
"\n",
"if vehicule1[\"Engine\"].mean() > vehicule2[\"Engine\"].mean():\n",
" print(\"En moyenne, l'Audi a plus de puissance que le BMW\\n\")\n",
"else:\n",
" print(\"En moyenne, la BMW a plus de puissance que l'Audi\\n\")"
"# if vehicule1[\"Engine\"].mean() > vehicule2[\"Engine\"].mean():\n",
"# print(\"En moyenne, l'Audi a plus de puissance que le BMW\\n\")\n",
"# else:\n",
"# print(\"En moyenne, la BMW a plus de puissance que l'Audi\\n\")"
]
},
{
@ -1016,39 +867,19 @@
},
{
"cell_type": "code",
"execution_count": 1 ,
"execution_count": null ,
"id": "dc0a7b57",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'df' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/UCA/anperederi/IUT/Maths/tp/Stats/tp4/TP4.ipynb Cellule 39\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/UCA/anperederi/IUT/Maths/tp/Stats/tp4/TP4.ipynb#X53sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m# Export moi le dataframe en csv\u001b[39;00m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/home/UCA/anperederi/IUT/Maths/tp/Stats/tp4/TP4.ipynb#X53sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m df\u001b[39m.\u001b[39mto_csv(\u001b[39m'\u001b[39m\u001b[39mcarDetailsV5.csv\u001b[39m\u001b[39m'\u001b[39m, index\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
]
}
],
"outputs": [],
"source": [
"# Export moi le dataframe en csv\n",
"df.to_csv('carDetailsV5 .csv', index=False)\n"
"df.to_csv('carDetails.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e3e89ea",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel) ",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
@ -1062,7 +893,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11 .2"
"version": "3.9 .2"
}
},
"nbformat": 4,