#sae2.04 import pandas as pd import getpass import matplotlib.pyplot as plt from sqlalchemy import create_engine, exc, text df = pd.read_csv('spotify_songs.csv', sep=',', encoding="latin-1") print(df) print(df.columns) ''' renvoie ['track_id', 'track_name', 'track_artist', 'track_popularity', 'track_album_id', 'track_album_name', 'track_album_release_date', 'playlist_name', 'playlist_id', 'playlist_genre', 'playlist_subgenre', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms'] ''' df = df.drop(columns=['key','mode','instrumentalness']) # Ces colonnes sont inutiles ici # Au besoin, on peut drop df = df.dropna() print(df['duration_ms']) # df['duration_ms'] = df['duration_ms']/1000 #ms en s # df['duration_ms'] = df['duration_ms']/60 #s en m df['duration_ms'] = pd.to_timedelta(df['duration_ms'], unit='ms') df = df.rename(columns={"duration_ms": "duration_m"}) print(df['duration_m']) df = df.drop(151) dfT = df[['track_id', 'track_name', 'track_popularity', 'duration_m', 'danceability', 'energy', 'loudness','speechiness', 'acousticness', 'liveness', 'valence', 'tempo', 'track_artist', 'track_album_id','playlist_id']] dfA = df[['track_album_id', 'track_album_name', 'track_album_release_date', 'track_artist']] dfP = df[['playlist_id', 'playlist_name', 'playlist_genre', 'playlist_subgenre']] dfArtist = df[['track_artist']] dfArtist = dfArtist.drop_duplicates() dfA = dfA.drop_duplicates() dfA['track_album_id'] = dfA['track_album_id'].drop_duplicates() dfA = dfA.dropna() dfP = dfP.drop_duplicates() dfP['playlist_id'] = dfP['playlist_id'].drop_duplicates() dfP = dfP.dropna() dfT = dfT.drop_duplicates() dfT['track_id'] = dfT['track_id'].drop_duplicates() dfT = dfT.dropna() co = None engine = create_engine("postgresql://reneveu:achanger@londres/dbreneveu") try : co = engine.connect() co.execute(text('''DROP TABLE IF EXISTS Artist CASCADE;''')) co.execute(text('''CREATE TABLE Artist( track_artist varchar(150), PRIMARY KEY (track_artist) );''')) co.execute(text('''DROP TABLE IF EXISTS Album CASCADE;''')) co.execute(text('''CREATE TABLE Album( track_album_id varchar(150), track_album_name varchar(500), track_album_release_date varchar(15), track_artist varchar(150) REFERENCES Artist, PRIMARY KEY (track_album_id) );''')) co.execute(text('''DROP TABLE IF EXISTS Playlist CASCADE;''')) co.execute(text('''CREATE TABLE Playlist( playlist_id varchar(150) PRIMARY KEY, playlist_name varchar(150), playlist_genre varchar(50), playlist_subgenre varchar(150) );''')) co.execute(text('''DROP TABLE IF EXISTS Track CASCADE;''')) co.execute(text('''CREATE TABLE Track( track_id varchar(150), track_name varchar(150), track_popularity numeric, duration_m time, danceability numeric, energy numeric, loudness numeric, speechiness numeric, acousticness numeric, liveness numeric, valence numeric, tempo numeric, track_artist varchar(150) REFERENCES Artist, track_album_id varchar(150) REFERENCES Album, playlist_id varchar(150) REFERENCES Playlist, PRIMARY KEY (track_id) );''')) for row in dfArtist.itertuples(): co.execute(text('''INSERT INTO Artist VALUES(:1);'''), {'1': row.track_artist}) co.execute(text('''SELECT * FROM Artist;''')) co.commit() for row in dfA.itertuples(): co.execute(text('''INSERT INTO Album VALUES(:1, :2, :3, :4);'''), {'1': row.track_album_id, '2': row.track_album_name, '3': row.track_album_release_date, '4': row.track_artist}) co.execute(text('''SELECT * FROM Album;''')) co.commit() for row in dfP.itertuples(): co.execute(text('''INSERT INTO Playlist VALUES(:1, :2, :3, :4);'''), {'1': row.playlist_id, '2': row.playlist_name, '3': row.playlist_genre, '4': row.playlist_subgenre}) co.execute(text('''SELECT * FROM Playlist;''')) co.commit() for row in dfT.itertuples(): co.execute(text('''INSERT INTO Track VALUES(:1, :2, :3, :4, :5, :6, :7, :8, :9, :10, :11, :12, :13, :14, :15);'''), {'1': row.track_id, '2': row.track_name, '3': row.track_popularity, '4': row.duration_m, '5': row.danceability, '6': row.energy, '7': row.loudness, '8': row.speechiness, '9': row.acousticness, '10': row.liveness, '11': row.valence, '12': row.tempo, '13': row.track_artist, '14': row.track_album_id, '15': row.playlist_id}) co.execute(text('''SELECT * FROM Track;''')) co.commit() except exc.SQLAlchemyError as e: print(e) finally : if co is not None: co.close() df['playlist_genre'].value_counts().plot.pie(ylabel='', autopct='%1.1f%%') plt.show()