import pandas import psycopg2 # pip3 install types-psycopg2 import matplotlib.pyplot as plt from getpass import getpass from calendar import month_abbr from typing import Dict # Create a mapping # month name to int value month_to_int: Dict[str, int] = dict((val, name) for name, val in enumerate(month_abbr)) def create_table(connection: psycopg2.connection, filename: str): cur = connection.cursor() cur.execute("""CREATE TABLE IF NOT EXISTS unemployment ( year_month DATE PRIMARY KEY, primary_school NUMERIC(4, 2) NOT NULL CHECK (primary_school >= 0), high_school NUMERIC(4, 2) NOT NULL CHECK (high_school >= 0), associates_degree NUMERIC(4, 2) NOT NULL CHECK (associates_degree >= 0), professional_degree NUMERIC(4, 2) NOT NULL CHECK (professional_degree >= 0), white NUMERIC(4, 2) NOT NULL CHECK (white >= 0), black NUMERIC(4, 2) NOT NULL CHECK (black >= 0), asian NUMERIC(4, 2) NOT NULL CHECK (asian >= 0), hispanic NUMERIC(4, 2) NOT NULL CHECK (hispanic >= 0), men NUMERIC(4, 2) NOT NULL CHECK (men >= 0), woman NUMERIC(4, 2) NOT NULL CHECK (woman >= 0) );""") df = pandas.DataFrame(pandas.read_csv(filename)).dropna() for row in df.itertuples(): cur.execute("INSERT INTO unemployment VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);", (str(row.Year) + '-' + str(month_to_int[row.Month]) + '-1', row.Primary_School, row.High_School, row.Associates_Degree, row.Professional_Degree, row.White, row.Black, row.Asian, row.Hispanic, row.Men, row.Women)) connection.commit() cur.close() def append_data(connection: psycopg2.connection, filename: str): # https://datahub.io/core/employment-us cur = connection.cursor() df = pandas.DataFrame(pandas.read_csv(filename)).dropna() for row in df.itertuples(): cur.execute("INSERT INTO unemployment VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);", (str(row.Year) + '-' + str(month_to_int[row.Month]) + '-1', row.Primary_School, row.High_School, row.Associates_Degree, row.Professional_Degree, row.White, row.Black, row.Asian, row.Hispanic, row.Men, row.Women)) connection.commit() cur.close() def graph_evolution(connection: psycopg2.connection): df = pandas.read_sql("""SELECT * FROM unemployment ORDER BY year_month;""", con=connection) fig = df.plot(x='year_month', y=['primary_school', 'high_school', 'associates_degree', 'professional_degree']) fig.set_xlabel('Period') fig.set_ylabel('Unemployement percentage') fig = df.plot(x='year_month', y=['white', 'black', 'asian', 'hispanic']) fig.set_xlabel('Period') fig.set_ylabel('Unemployement percentage') fig = df.plot(x='year_month', y=['men', 'woman']) fig.set_xlabel('Period') fig.set_ylabel('Unemployement percentage') if __name__ == '__main__': db_host = input('Nom d\'hôte : ') if not db_host: db_host = 'berlin' db_name = input('Nom de la base de données : ') if not db_name: db_name = 'dbclfreville2' db_user = input('Utilisateur : ') if not db_user: db_user = 'clfreville2' db_password = getpass('Mot de passe : ') connection = psycopg2.connect(host=db_host, port=5432, database=db_name, user=db_user, password=db_password) #create_table(connection, 'unemployment_data_us.csv') graph_evolution(connection) connection.close() plt.show()