IUT/BDD/tp/BDDs2/tp1/TPcf/sae/load0.py

import pandas
import psycopg2  # pip3 install types-psycopg2
import matplotlib.pyplot as plt
from getpass import getpass
from calendar import month_abbr
from typing import Dict

# Create a mapping
# month name to int value
month_to_int: Dict[str, int] = dict((val, name) for name, val in enumerate(month_abbr))

def create_table(connection: psycopg2.connection, filename: str):
    cur = connection.cursor()

    cur.execute("""CREATE TABLE IF NOT EXISTS unemployment (
    year_month DATE PRIMARY KEY,
    primary_school NUMERIC(4, 2) NOT NULL CHECK (primary_school >= 0),
    high_school NUMERIC(4, 2) NOT NULL CHECK (high_school >= 0),
    associates_degree NUMERIC(4, 2) NOT NULL CHECK (associates_degree >= 0),
    professional_degree NUMERIC(4, 2) NOT NULL CHECK (professional_degree >= 0),
    white NUMERIC(4, 2) NOT NULL CHECK (white >= 0),
    black NUMERIC(4, 2) NOT NULL CHECK (black >= 0),
    asian NUMERIC(4, 2) NOT NULL CHECK (asian >= 0),
    hispanic NUMERIC(4, 2) NOT NULL CHECK (hispanic >= 0),
    men NUMERIC(4, 2) NOT NULL CHECK (men >= 0),
    woman NUMERIC(4, 2) NOT NULL CHECK (woman >= 0)
);""")

    df = pandas.DataFrame(pandas.read_csv(filename)).dropna()

    for row in df.itertuples():
        cur.execute("INSERT INTO unemployment VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);",
                    (str(row.Year) + '-' + str(month_to_int[row.Month]) + '-1', row.Primary_School, row.High_School, row.Associates_Degree, row.Professional_Degree, row.White, row.Black, row.Asian, row.Hispanic, row.Men, row.Women))

    connection.commit()
    cur.close()

def append_data(connection: psycopg2.connection, filename: str):
    # https://datahub.io/core/employment-us
    cur = connection.cursor()

    df = pandas.DataFrame(pandas.read_csv(filename)).dropna()

    for row in df.itertuples():
        cur.execute("INSERT INTO unemployment VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);",
                    (str(row.Year) + '-' + str(month_to_int[row.Month]) + '-1', row.Primary_School, row.High_School, row.Associates_Degree, row.Professional_Degree, row.White, row.Black, row.Asian, row.Hispanic, row.Men, row.Women))

    connection.commit()
    cur.close()

def graph_evolution(connection: psycopg2.connection):
    df = pandas.read_sql("""SELECT * FROM unemployment ORDER BY year_month;""", con=connection)
    fig = df.plot(x='year_month', y=['primary_school', 'high_school', 'associates_degree', 'professional_degree'])
    fig.set_xlabel('Period')
    fig.set_ylabel('Unemployement percentage')

    fig = df.plot(x='year_month', y=['white', 'black', 'asian', 'hispanic'])
    fig.set_xlabel('Period')
    fig.set_ylabel('Unemployement percentage')

    fig = df.plot(x='year_month', y=['men', 'woman'])
    fig.set_xlabel('Period')
    fig.set_ylabel('Unemployement percentage')

if __name__ == '__main__':
    db_host = input('Nom d\'hôte : ')
    if not db_host:
        db_host = 'berlin'
    db_name = input('Nom de la base de données : ')
    if not db_name:
        db_name = 'dbclfreville2'
    db_user = input('Utilisateur : ')
    if not db_user:
        db_user = 'clfreville2'
    db_password = getpass('Mot de passe : ')

    connection = psycopg2.connect(host=db_host, port=5432, database=db_name, user=db_user, password=db_password)

    #create_table(connection, 'unemployment_data_us.csv')
    graph_evolution(connection)

    connection.close()

    plt.show()