parent
2c1551588f
commit
565aaa4615
@ -1,132 +0,0 @@
|
||||
#sae2.04
|
||||
|
||||
import pandas as pd
|
||||
import getpass
|
||||
import matplotlib.pyplot as plt
|
||||
from sqlalchemy import create_engine, exc, text
|
||||
|
||||
df = pd.read_csv('spotify_songs.csv', sep=',', encoding="latin-1")
|
||||
print(df)
|
||||
print(df.columns)
|
||||
'''
|
||||
renvoie ['track_id', 'track_name', 'track_artist', 'track_popularity',
|
||||
'track_album_id', 'track_album_name', 'track_album_release_date',
|
||||
'playlist_name', 'playlist_id', 'playlist_genre', 'playlist_subgenre',
|
||||
'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
|
||||
'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
|
||||
'duration_ms']
|
||||
'''
|
||||
df = df.drop(columns=['key','mode','instrumentalness'])
|
||||
# Ces colonnes sont inutiles ici
|
||||
# Au besoin, on peut drop
|
||||
df = df.dropna()
|
||||
|
||||
print(df['duration_ms'])
|
||||
# df['duration_ms'] = df['duration_ms']/1000 #ms en s
|
||||
# df['duration_ms'] = df['duration_ms']/60 #s en m
|
||||
|
||||
df['duration_ms'] = pd.to_timedelta(df['duration_ms'], unit='ms')
|
||||
df = df.rename(columns={"duration_ms": "duration_m"})
|
||||
print(df['duration_m'])
|
||||
|
||||
df = df.drop(151)
|
||||
|
||||
dfT = df[['track_id', 'track_name', 'track_popularity', 'duration_m', 'danceability', 'energy', 'loudness','speechiness', 'acousticness', 'liveness', 'valence', 'tempo', 'track_artist', 'track_album_id','playlist_id']]
|
||||
dfA = df[['track_album_id', 'track_album_name', 'track_album_release_date', 'track_artist']]
|
||||
dfP = df[['playlist_id', 'playlist_name', 'playlist_genre', 'playlist_subgenre']]
|
||||
dfArtist = df[['track_artist']]
|
||||
dfArtist = dfArtist.drop_duplicates()
|
||||
dfA = dfA.drop_duplicates()
|
||||
dfA['track_album_id'] = dfA['track_album_id'].drop_duplicates()
|
||||
dfA = dfA.dropna()
|
||||
dfP = dfP.drop_duplicates()
|
||||
dfP['playlist_id'] = dfP['playlist_id'].drop_duplicates()
|
||||
dfP = dfP.dropna()
|
||||
dfT = dfT.drop_duplicates()
|
||||
dfT['track_id'] = dfT['track_id'].drop_duplicates()
|
||||
dfT = dfT.dropna()
|
||||
|
||||
|
||||
|
||||
co = None
|
||||
engine = create_engine("postgresql://reneveu:achanger@londres/dbreneveu")
|
||||
try :
|
||||
co = engine.connect()
|
||||
co.execute(text('''DROP TABLE IF EXISTS Artist CASCADE;'''))
|
||||
co.execute(text('''CREATE TABLE Artist(
|
||||
track_artist varchar(150),
|
||||
PRIMARY KEY (track_artist)
|
||||
);'''))
|
||||
|
||||
co.execute(text('''DROP TABLE IF EXISTS Album CASCADE;'''))
|
||||
co.execute(text('''CREATE TABLE Album(
|
||||
track_album_id varchar(150),
|
||||
track_album_name varchar(500),
|
||||
track_album_release_date varchar(15),
|
||||
track_artist varchar(150) REFERENCES Artist,
|
||||
PRIMARY KEY (track_album_id)
|
||||
);'''))
|
||||
|
||||
co.execute(text('''DROP TABLE IF EXISTS Playlist CASCADE;'''))
|
||||
co.execute(text('''CREATE TABLE Playlist(
|
||||
playlist_id varchar(150) PRIMARY KEY,
|
||||
playlist_name varchar(150),
|
||||
playlist_genre varchar(50),
|
||||
playlist_subgenre varchar(150)
|
||||
);'''))
|
||||
|
||||
co.execute(text('''DROP TABLE IF EXISTS Track CASCADE;'''))
|
||||
co.execute(text('''CREATE TABLE Track(
|
||||
track_id varchar(150),
|
||||
track_name varchar(150),
|
||||
track_popularity numeric,
|
||||
duration_m time,
|
||||
danceability numeric,
|
||||
energy numeric,
|
||||
loudness numeric,
|
||||
speechiness numeric,
|
||||
acousticness numeric,
|
||||
liveness numeric,
|
||||
valence numeric,
|
||||
tempo numeric,
|
||||
track_artist varchar(150) REFERENCES Artist,
|
||||
track_album_id varchar(150) REFERENCES Album,
|
||||
playlist_id varchar(150) REFERENCES Playlist,
|
||||
PRIMARY KEY (track_id)
|
||||
);'''))
|
||||
|
||||
for row in dfArtist.itertuples():
|
||||
co.execute(text('''INSERT INTO Artist VALUES(:1);'''),
|
||||
{'1': row.track_artist})
|
||||
co.execute(text('''SELECT * FROM Artist;'''))
|
||||
co.commit()
|
||||
|
||||
for row in dfA.itertuples():
|
||||
co.execute(text('''INSERT INTO Album VALUES(:1, :2, :3, :4);'''),
|
||||
{'1': row.track_album_id, '2': row.track_album_name, '3': row.track_album_release_date, '4': row.track_artist})
|
||||
co.execute(text('''SELECT * FROM Album;'''))
|
||||
co.commit()
|
||||
|
||||
for row in dfP.itertuples():
|
||||
co.execute(text('''INSERT INTO Playlist VALUES(:1, :2, :3, :4);'''),
|
||||
{'1': row.playlist_id, '2': row.playlist_name, '3': row.playlist_genre, '4': row.playlist_subgenre})
|
||||
co.execute(text('''SELECT * FROM Playlist;'''))
|
||||
co.commit()
|
||||
|
||||
for row in dfT.itertuples():
|
||||
co.execute(text('''INSERT INTO Track VALUES(:1, :2, :3, :4, :5, :6, :7, :8, :9, :10, :11, :12, :13, :14, :15);'''),
|
||||
{'1': row.track_id, '2': row.track_name, '3': row.track_popularity, '4': row.duration_m, '5': row.danceability, '6': row.energy, '7': row.loudness,
|
||||
'8': row.speechiness, '9': row.acousticness, '10': row.liveness, '11': row.valence, '12': row.tempo, '13': row.track_artist, '14': row.track_album_id,
|
||||
'15': row.playlist_id})
|
||||
co.execute(text('''SELECT * FROM Track;'''))
|
||||
co.commit()
|
||||
|
||||
except exc.SQLAlchemyError as e:
|
||||
print(e)
|
||||
finally :
|
||||
if co is not None:
|
||||
co.close()
|
||||
|
||||
|
||||
df['playlist_genre'].value_counts().plot.pie(ylabel='', autopct='%1.1f%%')
|
||||
plt.show()
|
Loading…
Reference in new issue