parent
a97bbbbd70
commit
f41dd6f5c4
@ -1,4 +1,94 @@
|
||||
import pandas as pd
|
||||
data = pd.read_csv(r'vgsales.csv')
|
||||
df = pd.DataFrame(data)
|
||||
print(df)
|
||||
import pandas as pd
|
||||
import psycopg2 as psy
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from getpass import getpass
|
||||
|
||||
data = pd. read_csv (r'vgsales.csv')
|
||||
df = pd. DataFrame (data)
|
||||
|
||||
co = None
|
||||
try:
|
||||
co = psy.connect(host='londres',
|
||||
database = 'dbanperederi',
|
||||
user ='anperederi',
|
||||
password = getpass())
|
||||
|
||||
cur = co.cursor()
|
||||
|
||||
cur.execute('''DROP TABLE IF EXISTS Formule ;''')
|
||||
cur.execute('''CREATE TABLE Formule (
|
||||
Name varchar(150),
|
||||
Platform varchar,
|
||||
Year numeric ,
|
||||
Genre varchar,
|
||||
Publisher varchar,
|
||||
NA_Sales numeric,
|
||||
EU_Sales numeric,
|
||||
JP_Sales numeric,
|
||||
Other_Sales numeric,
|
||||
Global_Sales numeric,
|
||||
PRIMARY KEY (Name, Platform)
|
||||
);''')
|
||||
|
||||
# 3. Élimination des doublons
|
||||
df = pd.DataFrame(data).drop_duplicates(subset=['Name', 'Platform'])
|
||||
|
||||
for row in df.itertuples():
|
||||
cur.execute ('''INSERT INTO Formule VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);''',
|
||||
(row.Name, row.Platform, row.Year, row.Genre, row.Publisher,
|
||||
row.NA_Sales, row.EU_Sales, row.JP_Sales,
|
||||
row.Other_Sales, row.Global_Sales))
|
||||
|
||||
|
||||
|
||||
# 5. Recherche des lignes incohérentes (par delta vu qu'il s'agit de nombre flottants)
|
||||
cur.execute("""SELECT Name, Platform, ROUND((NA_Sales + EU_Sales + JP_Sales + Other_Sales)::numeric, 2), Global_Sales
|
||||
FROM Formule
|
||||
WHERE ABS(NA_Sales + EU_Sales + JP_Sales + Other_Sales - Global_Sales) > 0.1""")
|
||||
|
||||
# for row in cur.fetchall():
|
||||
# print(row)
|
||||
|
||||
# 6. Recalcul des ventes pour les lignes incohérentes
|
||||
cur.execute("""UPDATE Formule
|
||||
SET Global_Sales = ROUND((NA_Sales + EU_Sales + JP_Sales + Other_Sales)::numeric, 2)
|
||||
WHERE ABS(NA_Sales + EU_Sales + JP_Sales + Other_Sales - Global_Sales) > 0.1;""")
|
||||
|
||||
|
||||
|
||||
# 7. Création du DataFrame propre
|
||||
cur.execute("SELECT * FROM Formule;")
|
||||
|
||||
# # 1. Calcul des ventes moyennes par année de sortie et pour le genre aventure
|
||||
# df = pd.read_sql("""SELECT annee_sortie, AVG(ventes_total) ventes_moyennes, SUM(ventes_total) ventes_totales FROM plain_jeu
|
||||
# WHERE genre = 'Adventure' AND annee_sortie IS NOT NULL
|
||||
# GROUP BY annee_sortie
|
||||
# ORDER BY annee_sortie;""", con=co)
|
||||
|
||||
# # 2/3. Affichage sous forme de courbe
|
||||
# fig = df.plot(x='annee_sortie', y=['ventes_moyennes', 'ventes_totales'])
|
||||
# fig.legend(['Ventes moyennes', 'Ventes totales'])
|
||||
# fig.set_xlabel('Année de sortie')
|
||||
# fig.set_ylabel('Ventes (en millions)')
|
||||
|
||||
# # 4. Ventes totales pour l'ensemble des jeux
|
||||
# df = pd.read_sql("""SELECT annee_sortie, genre, SUM(ventes_total) ventes_totales FROM plain_jeu
|
||||
# WHERE annee_sortie IS NOT NULL
|
||||
# GROUP BY annee_sortie, genre
|
||||
# ORDER BY annee_sortie;""", con=co)
|
||||
# fig, ax = plt.subplots()
|
||||
# for i, (label, group) in enumerate(df.groupby(['genre'])):
|
||||
# ax = group.plot(ax=ax, kind='line', x='annee_sortie',
|
||||
# y='ventes_totales', label=label, color=plt.cm.tab20.colors[i])
|
||||
# ax.set_xlabel('Année de sortie')
|
||||
# ax.set_ylabel('Ventes (en millions)')
|
||||
|
||||
co.commit()
|
||||
cur.close()
|
||||
|
||||
except(Exception,psy.DatabaseError) as error :
|
||||
print(error)
|
||||
finally:
|
||||
if co is not None:
|
||||
co.close()
|
@ -0,0 +1,237 @@
|
||||
import pandas
|
||||
import psycopg2 # pip3 install types-psycopg2
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from getpass import getpass
|
||||
|
||||
##########
|
||||
# Partie 1
|
||||
##########
|
||||
|
||||
def part1_create_table(connection: psycopg2.connection, filename: str) -> pandas.DataFrame:
|
||||
cur = connection.cursor()
|
||||
|
||||
# 2. Création et population de la table
|
||||
cur.execute("""CREATE TABLE IF NOT EXISTS plain_jeu (
|
||||
nom VARCHAR(150) NOT NULL,
|
||||
plateforme VARCHAR(10) NOT NULL,
|
||||
annee_sortie INTEGER,
|
||||
genre VARCHAR(20) NOT NULL,
|
||||
editeur VARCHAR(40) NOT NULL,
|
||||
ventes_na NUMERIC(6, 2) NOT NULL DEFAULT 0,
|
||||
ventes_ue NUMERIC(6, 2) NOT NULL DEFAULT 0,
|
||||
ventes_jp NUMERIC(6, 2) NOT NULL DEFAULT 0,
|
||||
ventes_autre NUMERIC(6, 2) NOT NULL DEFAULT 0,
|
||||
ventes_total NUMERIC(6, 2) NOT NULL DEFAULT 0, -- Redondant vu que devrait être le total des autres colonnes des ventes
|
||||
PRIMARY KEY(nom, plateforme)
|
||||
)""")
|
||||
|
||||
ventes = pandas.read_csv(filename)
|
||||
|
||||
# 3. Élimination des doublons
|
||||
df = pandas.DataFrame(ventes).drop_duplicates(subset=['Name', 'Platform'])
|
||||
|
||||
# 2. Insertions
|
||||
for row in df.itertuples():
|
||||
cur.execute("INSERT INTO plain_jeu VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);",
|
||||
(row.Name, row.Platform, int(row.Year) if not pandas.isnull(row.Year) else None, row.Genre,
|
||||
row.Publisher, row.NA_Sales, row.EU_Sales,
|
||||
row.JP_Sales, row.Other_Sales, row.Global_Sales))
|
||||
|
||||
# 5. Recherche des lignes incohérentes (par delta vu qu'il s'agit de nombre flottants)
|
||||
cur.execute("""SELECT nom, plateforme, ROUND((ventes_na + ventes_ue + ventes_jp + ventes_autre)::numeric, 2), ventes_total
|
||||
FROM plain_jeu
|
||||
WHERE ABS(ventes_na + ventes_ue + ventes_jp + ventes_autre - ventes_total) > 0.1""")
|
||||
for row in cur.fetchall():
|
||||
print(row)
|
||||
|
||||
# 6. Recalcul des ventes pour les lignes incohérentes
|
||||
cur.execute("""UPDATE plain_jeu
|
||||
SET ventes_total = ROUND((ventes_na + ventes_ue + ventes_jp + ventes_autre)::numeric, 2)
|
||||
WHERE ABS(ventes_na + ventes_ue + ventes_jp + ventes_autre - ventes_total) > 0.1;""")
|
||||
|
||||
connection.commit()
|
||||
cur.close()
|
||||
|
||||
# 7. Création du DataFrame propre
|
||||
return pandas.read_sql("SELECT * FROM plain_jeu;", con=connection)
|
||||
|
||||
|
||||
##########
|
||||
# Partie 2
|
||||
##########
|
||||
|
||||
def part2_graphs(connection: psycopg2.connection):
|
||||
# 1. Calcul des ventes moyennes par année de sortie et pour le genre aventure
|
||||
df = pandas.read_sql("""SELECT annee_sortie, AVG(ventes_total) ventes_moyennes, SUM(ventes_total) ventes_totales FROM plain_jeu
|
||||
WHERE genre = 'Adventure' AND annee_sortie IS NOT NULL
|
||||
GROUP BY annee_sortie
|
||||
ORDER BY annee_sortie;""", con=connection)
|
||||
|
||||
# 2/3. Affichage sous forme de courbe
|
||||
fig = df.plot(x='annee_sortie', y=['ventes_moyennes', 'ventes_totales'])
|
||||
fig.legend(['Ventes moyennes', 'Ventes totales'])
|
||||
fig.set_xlabel('Année de sortie')
|
||||
fig.set_ylabel('Ventes (en millions)')
|
||||
|
||||
# 4. Ventes totales pour l'ensemble des jeux
|
||||
df = pandas.read_sql("""SELECT annee_sortie, genre, SUM(ventes_total) ventes_totales FROM plain_jeu
|
||||
WHERE annee_sortie IS NOT NULL
|
||||
GROUP BY annee_sortie, genre
|
||||
ORDER BY annee_sortie;""", con=connection)
|
||||
fig, ax = plt.subplots()
|
||||
for i, (label, group) in enumerate(df.groupby(['genre'])):
|
||||
ax = group.plot(ax=ax, kind='line', x='annee_sortie',
|
||||
y='ventes_totales', label=label, color=plt.cm.tab20.colors[i])
|
||||
ax.set_xlabel('Année de sortie')
|
||||
ax.set_ylabel('Ventes (en millions)')
|
||||
|
||||
|
||||
##########
|
||||
# Partie 3
|
||||
##########
|
||||
|
||||
def part3_graphs(connection: psycopg2.connection):
|
||||
# 1. Calcul du total des ventes en Europe par plateforme
|
||||
df = pandas.read_sql("""SELECT plateforme, SUM(ventes_ue) ventes_ue FROM plain_jeu
|
||||
GROUP BY plateforme
|
||||
ORDER BY ventes_ue;""", con=connection)
|
||||
|
||||
def plot_ventes_ue(df: pandas.DataFrame, plot_kind: str = 'line'):
|
||||
fig = df.plot(x='plateforme', y='ventes_ue', legend=False, kind=plot_kind)
|
||||
fig.set_xlabel('Plateforme')
|
||||
fig.set_ylabel('Ventes en Europe (en millions)')
|
||||
fig.set_xticks(np.arange(len(df))) # Force l'affichage de tous les labels en abscisse
|
||||
fig.set_xticklabels(df['plateforme'])
|
||||
fig.set_title('Ventes de jeux vidéos en Europe par plateforme')
|
||||
|
||||
# 2. Affichage sous forme de courbe
|
||||
plot_ventes_ue(df)
|
||||
|
||||
# 3. Affichage sous forme d'histogramme
|
||||
# plot_ventes_ue(df, 'hist')
|
||||
plot_ventes_ue(df, 'bar')
|
||||
|
||||
# 4. Calcul du total des ventes par plateforme et par zone géographique
|
||||
df = pandas.read_sql("""SELECT plateforme, SUM(ventes_na) ventes_na, SUM(ventes_ue) ventes_ue, SUM(ventes_jp) ventes_jp, SUM(ventes_autre) ventes_autre FROM plain_jeu
|
||||
GROUP BY plateforme
|
||||
ORDER BY SUM(ventes_total);""", con=connection)
|
||||
|
||||
# 5/6. Affichage sous forme de courbes
|
||||
fig = df.plot(x='plateforme', style=[
|
||||
'r*-', 'bo--', 'y^:', 'gs-.'])
|
||||
fig.set_xticks(np.arange(len(df['plateforme'])))
|
||||
fig.set_xticklabels(df['plateforme'])
|
||||
fig.set_xlabel('Plateforme')
|
||||
fig.set_ylabel('Ventes (en millions)')
|
||||
|
||||
fig = df.plot.bar(x='plateforme', y=[
|
||||
'ventes_na', 'ventes_ue', 'ventes_jp', 'ventes_autre'])
|
||||
fig.set_xlabel('Plateforme')
|
||||
fig.set_ylabel('Ventes (en millions)')
|
||||
|
||||
|
||||
##########
|
||||
# Partie 4
|
||||
##########
|
||||
|
||||
def plot_ventes_jeu(cur: psycopg2.cursor, nom: str, pourcentages: bool = True):
|
||||
# 1. Récupération des ventes, réorganisées en plusieurs lignes dans le dataframe
|
||||
cur.execute("""SELECT ventes_na, ventes_ue, ventes_jp, ventes_autre FROM plain_jeu
|
||||
WHERE nom = '%s';""" % nom)
|
||||
data = cur.fetchone()
|
||||
zones = [desc[0].split('_')[1] for desc in cur.description]
|
||||
df = pandas.DataFrame([[zones[i], float(data[i])] for i in range(len(data))], columns=['zone', 'ventes'],
|
||||
index=zones)
|
||||
|
||||
title = 'Ventes du jeu ' + nom + ' par zone géographique'
|
||||
|
||||
# 2. Diagramme en bâtons
|
||||
fig = df.plot.bar(x='zone', y='ventes', legend=False, rot=0)
|
||||
fig.set_title(title)
|
||||
|
||||
if pourcentages:
|
||||
# 4. Diagramme camembert avec pourcentages
|
||||
fig = df.plot.pie(y='ventes', labels=None, autopct='%1.1f%%')
|
||||
else:
|
||||
# 3. Diagramme camembert
|
||||
fig = df.plot.pie(y='ventes', legend=False)
|
||||
fig.set_title(title)
|
||||
fig.set_xlabel('Zone géographique')
|
||||
fig.set_ylabel('Ventes (en millions)')
|
||||
|
||||
|
||||
def part4_graphs(connection: psycopg2.connection):
|
||||
with connection.cursor() as cur:
|
||||
plot_ventes_jeu(cur, 'Mario Kart 64')
|
||||
# 5. Résultats de Mario Kart Wii
|
||||
plot_ventes_jeu(cur, 'Mario Kart Wii')
|
||||
|
||||
|
||||
##########
|
||||
# Partie 5
|
||||
##########
|
||||
|
||||
def part5_graphs(connection: psycopg2.connection):
|
||||
# 1/2. Pourcentage du total des ventes par genre
|
||||
df = pandas.read_sql("""SELECT genre, SUM(ventes_total) total_ventes FROM plain_jeu
|
||||
GROUP BY genre;""", con=connection)
|
||||
df.set_index('genre', inplace=True)
|
||||
|
||||
# 3. Diagramme camembert
|
||||
fig = df.plot.pie(y='total_ventes', legend=False, autopct='%1.1f%%', colors=plt.cm.tab20.colors)
|
||||
fig.set_title('Ventes mondiales de jeux vidéos par genre')
|
||||
fig.set_ylabel('')
|
||||
|
||||
# 4. 4 camemberts du total des ventes par genre et par
|
||||
df = pandas.read_sql("""
|
||||
SELECT CASE WHEN annee_sortie < 1990 THEN '< 1990'
|
||||
WHEN annee_sortie BETWEEN 1990 AND 1999 THEN '<=> 1990-1999'
|
||||
WHEN annee_sortie BETWEEN 2000 AND 2009 THEN '<=> 2000-2009'
|
||||
WHEN annee_sortie >= 2010 THEN '>= 2010'
|
||||
END annee,
|
||||
genre,
|
||||
SUM(ventes_total) ventes_totales
|
||||
FROM plain_jeu
|
||||
WHERE annee_sortie IS NOT NULL
|
||||
GROUP BY annee, genre
|
||||
ORDER BY annee, genre;""", con=connection)
|
||||
grouped = df.groupby('annee')
|
||||
rowlength = grouped.ngroups // 2
|
||||
fig, axs = plt.subplots(figsize=(9, 4), nrows=2, ncols=rowlength)
|
||||
targets = zip(grouped.groups.keys(), axs.flatten())
|
||||
legend = []
|
||||
|
||||
for (key, ax) in targets:
|
||||
group = grouped.get_group(key)
|
||||
legend = group['genre']
|
||||
ax = group.plot.pie(ax=ax, y='ventes_totales', labels=None,
|
||||
legend=False, colors=plt.cm.tab20.colors)
|
||||
ax.set_ylabel('')
|
||||
ax.set_title(key)
|
||||
fig.legend(legend)
|
||||
fig.suptitle('Ventes de jeux vidéos par genre et par période')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
db_host = input('Nom d\'hôte : ')
|
||||
if not db_host:
|
||||
db_host = 'berlin'
|
||||
db_name = input('Nom de la base de données : ')
|
||||
if not db_name:
|
||||
db_name = 'dbclfreville2'
|
||||
db_user = input('Utilisateur : ')
|
||||
if not db_user:
|
||||
db_user = 'clfreville2'
|
||||
db_password = getpass('Mot de passe : ')
|
||||
|
||||
connection = psycopg2.connect(host=db_host, port=5432, database=db_name, user=db_user, password=db_password)
|
||||
|
||||
#part1_create_table(connection, 'vgsales.csv')
|
||||
part2_graphs(connection)
|
||||
part3_graphs(connection)
|
||||
part4_graphs(connection)
|
||||
part5_graphs(connection)
|
||||
|
||||
connection.close()
|
||||
plt.show()
|
@ -0,0 +1,93 @@
|
||||
import pandas as pd
|
||||
import psycopg2 as psy
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from getpass import getpass
|
||||
|
||||
data = pd. read_csv (r'vgsales.csv')
|
||||
df = pd. DataFrame (data)
|
||||
|
||||
co = None
|
||||
try:
|
||||
co = psy.connect(host='londres',
|
||||
database = 'dbanperederi',
|
||||
user ='anperederi',
|
||||
password = getpass())
|
||||
|
||||
cur = co.cursor()
|
||||
|
||||
cur.execute('''DROP TABLE IF EXISTS Formule ;''')
|
||||
cur.execute('''CREATE TABLE Formule (
|
||||
Name varchar(150) NOT NULL,
|
||||
Platform varchar NOT NULL,
|
||||
Year numeric NOT NULL,
|
||||
Genre varchar NOT NULL,
|
||||
Publisher varchar NOT NULL,
|
||||
NA_Sales numeric NOT NULL,
|
||||
EU_Sales numeric NOT NULL,
|
||||
JP_Sales numeric NOT NULL,
|
||||
Other_Sales numeric NOT NULL,
|
||||
Global_Sales numeric NOT NULL,
|
||||
|
||||
);''')
|
||||
for row in df.itertuples():
|
||||
cur.execute ('''INSERT INTO Formule VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);''',
|
||||
(row.Name, row.Platform, row.Year, row.Genre, row.Publisher,
|
||||
row.NA_Sales, row.EU_Sales, row.JP_Sales,
|
||||
row.Other_Sales, row.Global_Sales))
|
||||
|
||||
# 3. Élimination des doublons
|
||||
df = pd.DataFrame(data).drop_duplicates(subset=['Name', 'Platform'])
|
||||
|
||||
# 5. Recherche des lignes incohérentes (par delta vu qu'il s'agit de nombre flottants)
|
||||
cur.execute("""SELECT Name, Platform, ROUND((NA_Sales + EU_Sales + JP_Sales + Other_Sales)::numeric, 2), Global_Sales
|
||||
FROM Formule
|
||||
WHERE ABS(NA_Sales + EU_Sales + JP_Sales + Other_Sales - Global_Sales) > 0.1""")
|
||||
|
||||
for row in cur.fetchall():
|
||||
print(row)
|
||||
|
||||
# 6. Recalcul des ventes pour les lignes incohérentes
|
||||
cur.execute("""UPDATE Formule
|
||||
SET Global_Sales = ROUND((NA_Sales + EU_Sales + JP_Sales + Other_Sales)::numeric, 2)
|
||||
WHERE ABS(NA_Sales + EU_Sales + JP_Sales + Other_Sales - Global_Sales) > 0.1;""")
|
||||
|
||||
co.commit()
|
||||
cur.close()
|
||||
|
||||
# 7. Création du DataFrame propre
|
||||
# dp = pd.read_sql("SELECT * FROM Formule;", con=co) #! Regarder cette ligne !!!
|
||||
# print(dp)
|
||||
|
||||
# # 1. Calcul des ventes moyennes par année de sortie et pour le genre aventure
|
||||
# df = pd.read_sql("""SELECT year, AVG(Global_Sales) sales_avg, SUM(Global_Sales) sales_total
|
||||
# FROM Formule
|
||||
# WHERE Genre = 'Adventure' AND year IS NOT NULL
|
||||
# GROUP BY year
|
||||
# ORDER BY year;""", con=co)
|
||||
|
||||
# # 2/3. Affichage sous forme de courbe
|
||||
# fig = df.plot(x='year', y=['sales_avg', 'sales_total'])
|
||||
# fig.legend(['Ventes moyennes', 'Ventes totales'])
|
||||
# fig.set_xlabel('Année de sortie')
|
||||
# fig.set_ylabel('Ventes (en millions)')
|
||||
|
||||
# # 4. Ventes totales pour l'ensemble des jeux
|
||||
# df = pd.read_sql("""SELECT year, Genre, SUM(Global_Sales) sales_total
|
||||
# FROM Formule
|
||||
# WHERE year IS NOT NULL
|
||||
# GROUP BY year, Genre
|
||||
# ORDER BY year;""", con=co)
|
||||
# fig, ax = plt.subplots()
|
||||
# for i, (label, group) in enumerate(df.groupby(['Genre'])):
|
||||
# ax = group.plot(ax=ax, kind='line', x='year',
|
||||
# y='sales_total', label=label, color=plt.cm.tab20.colors[i])
|
||||
# ax.set_xlabel('Année de sortie')
|
||||
# ax.set_ylabel('Ventes (en millions)')
|
||||
|
||||
|
||||
except(Exception,psy.DatabaseError) as error :
|
||||
print(error)
|
||||
finally:
|
||||
if co is not None:
|
||||
co.close()
|
@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
import psycopg2 as psy
|
||||
from getpass import getpass
|
||||
|
||||
data = pd.read_csv(r'vgsales.csv')
|
||||
df = pd.DataFrame(data)
|
||||
print(df)
|
||||
|
||||
co = None
|
||||
try:
|
||||
co = psy.connect(host='londres',
|
||||
database ='dbanperederi',
|
||||
user='anperederi',
|
||||
password = getpass())
|
||||
|
||||
curs = co.cursor()
|
||||
|
||||
|
||||
curs.execute('''SELECT *
|
||||
FROM Site
|
||||
WHERE codePostal LIKE '63%';''')
|
||||
res = curs.fetchall()
|
||||
print(res)
|
||||
|
||||
|
||||
curs.close()
|
||||
except(Exception, psy.DatabaseError) as error:
|
||||
print(error)
|
||||
exit()
|
||||
# Attention ! Toujours fermer la connexion lorsqu 'on en a plus besoin
|
||||
finally:
|
||||
if co is not None:
|
||||
co.close()
|
Loading…
Reference in new issue