You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
3.3 KiB
94 lines
3.3 KiB
import pandas as pd
|
|
import psycopg2 as psy
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from getpass import getpass
|
|
|
|
data = pd. read_csv (r'vgsales.csv')
|
|
df = pd. DataFrame (data)
|
|
|
|
co = None
|
|
try:
|
|
co = psy.connect(host='londres',
|
|
database = 'dbanperederi',
|
|
user ='anperederi',
|
|
password = getpass())
|
|
|
|
cur = co.cursor()
|
|
|
|
cur.execute('''DROP TABLE IF EXISTS Formule ;''')
|
|
cur.execute('''CREATE TABLE Formule (
|
|
Name varchar(150),
|
|
Platform varchar,
|
|
Year numeric ,
|
|
Genre varchar,
|
|
Publisher varchar,
|
|
NA_Sales numeric,
|
|
EU_Sales numeric,
|
|
JP_Sales numeric,
|
|
Other_Sales numeric,
|
|
Global_Sales numeric,
|
|
PRIMARY KEY (Name, Platform)
|
|
);''')
|
|
|
|
# 3. Élimination des doublons
|
|
df = pd.DataFrame(data).drop_duplicates(subset=['Name', 'Platform'])
|
|
|
|
for row in df.itertuples():
|
|
cur.execute ('''INSERT INTO Formule VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);''',
|
|
(row.Name, row.Platform, row.Year, row.Genre, row.Publisher,
|
|
row.NA_Sales, row.EU_Sales, row.JP_Sales,
|
|
row.Other_Sales, row.Global_Sales))
|
|
|
|
|
|
|
|
# 5. Recherche des lignes incohérentes (par delta vu qu'il s'agit de nombre flottants)
|
|
cur.execute("""SELECT Name, Platform, ROUND((NA_Sales + EU_Sales + JP_Sales + Other_Sales)::numeric, 2), Global_Sales
|
|
FROM Formule
|
|
WHERE ABS(NA_Sales + EU_Sales + JP_Sales + Other_Sales - Global_Sales) > 0.1""")
|
|
|
|
# for row in cur.fetchall():
|
|
# print(row)
|
|
|
|
# 6. Recalcul des ventes pour les lignes incohérentes
|
|
cur.execute("""UPDATE Formule
|
|
SET Global_Sales = ROUND((NA_Sales + EU_Sales + JP_Sales + Other_Sales)::numeric, 2)
|
|
WHERE ABS(NA_Sales + EU_Sales + JP_Sales + Other_Sales - Global_Sales) > 0.1;""")
|
|
|
|
|
|
|
|
# 7. Création du DataFrame propre
|
|
cur.execute("SELECT * FROM Formule;")
|
|
|
|
# # 1. Calcul des ventes moyennes par année de sortie et pour le genre aventure
|
|
# df = pd.read_sql("""SELECT annee_sortie, AVG(ventes_total) ventes_moyennes, SUM(ventes_total) ventes_totales FROM plain_jeu
|
|
# WHERE genre = 'Adventure' AND annee_sortie IS NOT NULL
|
|
# GROUP BY annee_sortie
|
|
# ORDER BY annee_sortie;""", con=co)
|
|
|
|
# # 2/3. Affichage sous forme de courbe
|
|
# fig = df.plot(x='annee_sortie', y=['ventes_moyennes', 'ventes_totales'])
|
|
# fig.legend(['Ventes moyennes', 'Ventes totales'])
|
|
# fig.set_xlabel('Année de sortie')
|
|
# fig.set_ylabel('Ventes (en millions)')
|
|
|
|
# # 4. Ventes totales pour l'ensemble des jeux
|
|
# df = pd.read_sql("""SELECT annee_sortie, genre, SUM(ventes_total) ventes_totales FROM plain_jeu
|
|
# WHERE annee_sortie IS NOT NULL
|
|
# GROUP BY annee_sortie, genre
|
|
# ORDER BY annee_sortie;""", con=co)
|
|
# fig, ax = plt.subplots()
|
|
# for i, (label, group) in enumerate(df.groupby(['genre'])):
|
|
# ax = group.plot(ax=ax, kind='line', x='annee_sortie',
|
|
# y='ventes_totales', label=label, color=plt.cm.tab20.colors[i])
|
|
# ax.set_xlabel('Année de sortie')
|
|
# ax.set_ylabel('Ventes (en millions)')
|
|
|
|
co.commit()
|
|
cur.close()
|
|
|
|
except(Exception,psy.DatabaseError) as error :
|
|
print(error)
|
|
finally:
|
|
if co is not None:
|
|
co.close() |