From 565aaa4615f648f446ca1602e84c14ff7021ba18 Mon Sep 17 00:00:00 2001 From: Remi NEVEU Date: Sat, 4 May 2024 11:08:08 +0200 Subject: [PATCH] =?UTF-8?q?Supprimer=20'test=5FR=C3=A9mi'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test_Rémi | 132 ----------------------------------------------------- 1 file changed, 132 deletions(-) delete mode 100644 test_Rémi diff --git a/test_Rémi b/test_Rémi deleted file mode 100644 index b570396..0000000 --- a/test_Rémi +++ /dev/null @@ -1,132 +0,0 @@ -#sae2.04 - -import pandas as pd -import getpass -import matplotlib.pyplot as plt -from sqlalchemy import create_engine, exc, text - -df = pd.read_csv('spotify_songs.csv', sep=',', encoding="latin-1") -print(df) -print(df.columns) -''' -renvoie ['track_id', 'track_name', 'track_artist', 'track_popularity', - 'track_album_id', 'track_album_name', 'track_album_release_date', - 'playlist_name', 'playlist_id', 'playlist_genre', 'playlist_subgenre', - 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', - 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', - 'duration_ms'] -''' -df = df.drop(columns=['key','mode','instrumentalness']) -# Ces colonnes sont inutiles ici -# Au besoin, on peut drop -df = df.dropna() - -print(df['duration_ms']) -# df['duration_ms'] = df['duration_ms']/1000 #ms en s -# df['duration_ms'] = df['duration_ms']/60 #s en m - -df['duration_ms'] = pd.to_timedelta(df['duration_ms'], unit='ms') -df = df.rename(columns={"duration_ms": "duration_m"}) -print(df['duration_m']) - -df = df.drop(151) - -dfT = df[['track_id', 'track_name', 'track_popularity', 'duration_m', 'danceability', 'energy', 'loudness','speechiness', 'acousticness', 'liveness', 'valence', 'tempo', 'track_artist', 'track_album_id','playlist_id']] -dfA = df[['track_album_id', 'track_album_name', 'track_album_release_date', 'track_artist']] -dfP = df[['playlist_id', 'playlist_name', 'playlist_genre', 'playlist_subgenre']] -dfArtist = df[['track_artist']] -dfArtist = dfArtist.drop_duplicates() -dfA = dfA.drop_duplicates() -dfA['track_album_id'] = dfA['track_album_id'].drop_duplicates() -dfA = dfA.dropna() -dfP = dfP.drop_duplicates() -dfP['playlist_id'] = dfP['playlist_id'].drop_duplicates() -dfP = dfP.dropna() -dfT = dfT.drop_duplicates() -dfT['track_id'] = dfT['track_id'].drop_duplicates() -dfT = dfT.dropna() - - - -co = None -engine = create_engine("postgresql://reneveu:achanger@londres/dbreneveu") -try : - co = engine.connect() - co.execute(text('''DROP TABLE IF EXISTS Artist CASCADE;''')) - co.execute(text('''CREATE TABLE Artist( - track_artist varchar(150), - PRIMARY KEY (track_artist) - );''')) - - co.execute(text('''DROP TABLE IF EXISTS Album CASCADE;''')) - co.execute(text('''CREATE TABLE Album( - track_album_id varchar(150), - track_album_name varchar(500), - track_album_release_date varchar(15), - track_artist varchar(150) REFERENCES Artist, - PRIMARY KEY (track_album_id) - );''')) - - co.execute(text('''DROP TABLE IF EXISTS Playlist CASCADE;''')) - co.execute(text('''CREATE TABLE Playlist( - playlist_id varchar(150) PRIMARY KEY, - playlist_name varchar(150), - playlist_genre varchar(50), - playlist_subgenre varchar(150) - );''')) - - co.execute(text('''DROP TABLE IF EXISTS Track CASCADE;''')) - co.execute(text('''CREATE TABLE Track( - track_id varchar(150), - track_name varchar(150), - track_popularity numeric, - duration_m time, - danceability numeric, - energy numeric, - loudness numeric, - speechiness numeric, - acousticness numeric, - liveness numeric, - valence numeric, - tempo numeric, - track_artist varchar(150) REFERENCES Artist, - track_album_id varchar(150) REFERENCES Album, - playlist_id varchar(150) REFERENCES Playlist, - PRIMARY KEY (track_id) - );''')) - - for row in dfArtist.itertuples(): - co.execute(text('''INSERT INTO Artist VALUES(:1);'''), - {'1': row.track_artist}) - co.execute(text('''SELECT * FROM Artist;''')) - co.commit() - - for row in dfA.itertuples(): - co.execute(text('''INSERT INTO Album VALUES(:1, :2, :3, :4);'''), - {'1': row.track_album_id, '2': row.track_album_name, '3': row.track_album_release_date, '4': row.track_artist}) - co.execute(text('''SELECT * FROM Album;''')) - co.commit() - - for row in dfP.itertuples(): - co.execute(text('''INSERT INTO Playlist VALUES(:1, :2, :3, :4);'''), - {'1': row.playlist_id, '2': row.playlist_name, '3': row.playlist_genre, '4': row.playlist_subgenre}) - co.execute(text('''SELECT * FROM Playlist;''')) - co.commit() - - for row in dfT.itertuples(): - co.execute(text('''INSERT INTO Track VALUES(:1, :2, :3, :4, :5, :6, :7, :8, :9, :10, :11, :12, :13, :14, :15);'''), - {'1': row.track_id, '2': row.track_name, '3': row.track_popularity, '4': row.duration_m, '5': row.danceability, '6': row.energy, '7': row.loudness, - '8': row.speechiness, '9': row.acousticness, '10': row.liveness, '11': row.valence, '12': row.tempo, '13': row.track_artist, '14': row.track_album_id, - '15': row.playlist_id}) - co.execute(text('''SELECT * FROM Track;''')) - co.commit() - -except exc.SQLAlchemyError as e: - print(e) -finally : - if co is not None: - co.close() - - -df['playlist_genre'].value_counts().plot.pie(ylabel='', autopct='%1.1f%%') -plt.show() \ No newline at end of file