|
|
#sae2.04
|
|
|
|
|
|
import pandas as pd
|
|
|
import getpass
|
|
|
from sqlalchemy import create_engine, exc, text
|
|
|
|
|
|
df = pd.read_csv('spotify_songs.csv', sep=',', encoding="latin-1")
|
|
|
print(df)
|
|
|
print(df.columns)
|
|
|
'''
|
|
|
renvoie ['track_id', 'track_name', 'track_artist', 'track_popularity',
|
|
|
'track_album_id', 'track_album_name', 'track_album_release_date',
|
|
|
'playlist_name', 'playlist_id', 'playlist_genre', 'playlist_subgenre',
|
|
|
'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
|
|
|
'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
|
|
|
'duration_ms']
|
|
|
'''
|
|
|
df = df.drop(columns=['key','mode','instrumentalness'])
|
|
|
# Ces colonnes sont inutiles ici
|
|
|
# Au besoin, on peut drop
|
|
|
df = df.dropna()
|
|
|
|
|
|
print(df['duration_ms'])
|
|
|
df['duration_ms'] = df['duration_ms']/1000 #ms en s
|
|
|
df['duration_ms'] = df['duration_ms']/60 #s en m
|
|
|
df = df.rename(columns={"duration_ms": "duration_m"})
|
|
|
df['duration_m'] = df['duration_m'].round(2)
|
|
|
print(df['duration_m'])
|
|
|
|
|
|
# df = df.drop_duplicates()
|
|
|
# pas besoin car aucun titre en double
|
|
|
dfT = df[['track_id', 'track_name', 'track_artist', 'track_popularity', 'duration_m']]
|
|
|
dfA = df[['track_album_id', 'track_album_name', 'track_artist', 'track_album_release_date']]
|
|
|
dfP = df[['playlist_name', 'playlist_id', 'playlist_genre', 'playlist_subgenre']]
|
|
|
dfAutre = df[['track_id', 'track_album_id', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'liveness', 'valence', 'tempo']]
|
|
|
|
|
|
"""
|
|
|
co = None
|
|
|
engine = create_engine("postgresql://reneveu:achanger@londres/dbreneveu")
|
|
|
try :
|
|
|
co = engine.connect()
|
|
|
dfT = df[["track_name", "artist_name", "released_year"]]
|
|
|
dfT = dfT.drop_duplicates()
|
|
|
co.execute(text('''DROP TABLE IF EXISTS Test CASCADE;'''))
|
|
|
co.execute(text('''CREATE TABLE Test(
|
|
|
track_name varchar(150),
|
|
|
artist_name varchar(150),
|
|
|
released_year numeric(4),
|
|
|
PRIMARY KEY (track_name, artist_name)
|
|
|
);'''))
|
|
|
|
|
|
for row in dfT.itertuples():
|
|
|
co.execute(text('''INSERT INTO Test VALUES(:1, :2, :3);'''),
|
|
|
{'1': row.track_name, '2': row.artist_name, '3': row.released_year})
|
|
|
co.execute(text('''SELECT * FROM Test;'''))
|
|
|
co.commit()
|
|
|
|
|
|
except exc.SQLAlchemyError as e:
|
|
|
print(e)
|
|
|
finally :
|
|
|
if co is not None:
|
|
|
co.close()
|
|
|
""" |