You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

85 lines
3.4 KiB

import pandas
import psycopg2 # pip3 install types-psycopg2
import matplotlib.pyplot as plt
from getpass import getpass
from calendar import month_abbr
from typing import Dict
# Create a mapping
# month name to int value
month_to_int: Dict[str, int] = dict((val, name) for name, val in enumerate(month_abbr))
def create_table(connection: psycopg2.connection, filename: str):
cur = connection.cursor()
cur.execute("""CREATE TABLE IF NOT EXISTS unemployment (
year_month DATE PRIMARY KEY,
primary_school NUMERIC(4, 2) NOT NULL CHECK (primary_school >= 0),
high_school NUMERIC(4, 2) NOT NULL CHECK (high_school >= 0),
associates_degree NUMERIC(4, 2) NOT NULL CHECK (associates_degree >= 0),
professional_degree NUMERIC(4, 2) NOT NULL CHECK (professional_degree >= 0),
white NUMERIC(4, 2) NOT NULL CHECK (white >= 0),
black NUMERIC(4, 2) NOT NULL CHECK (black >= 0),
asian NUMERIC(4, 2) NOT NULL CHECK (asian >= 0),
hispanic NUMERIC(4, 2) NOT NULL CHECK (hispanic >= 0),
men NUMERIC(4, 2) NOT NULL CHECK (men >= 0),
woman NUMERIC(4, 2) NOT NULL CHECK (woman >= 0)
);""")
df = pandas.DataFrame(pandas.read_csv(filename)).dropna()
for row in df.itertuples():
cur.execute("INSERT INTO unemployment VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);",
(str(row.Year) + '-' + str(month_to_int[row.Month]) + '-1', row.Primary_School, row.High_School, row.Associates_Degree, row.Professional_Degree, row.White, row.Black, row.Asian, row.Hispanic, row.Men, row.Women))
connection.commit()
cur.close()
def append_data(connection: psycopg2.connection, filename: str):
# https://datahub.io/core/employment-us
cur = connection.cursor()
df = pandas.DataFrame(pandas.read_csv(filename)).dropna()
for row in df.itertuples():
cur.execute("INSERT INTO unemployment VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);",
(str(row.Year) + '-' + str(month_to_int[row.Month]) + '-1', row.Primary_School, row.High_School, row.Associates_Degree, row.Professional_Degree, row.White, row.Black, row.Asian, row.Hispanic, row.Men, row.Women))
connection.commit()
cur.close()
def graph_evolution(connection: psycopg2.connection):
df = pandas.read_sql("""SELECT * FROM unemployment ORDER BY year_month;""", con=connection)
fig = df.plot(x='year_month', y=['primary_school', 'high_school', 'associates_degree', 'professional_degree'])
fig.set_xlabel('Period')
fig.set_ylabel('Unemployement percentage')
fig = df.plot(x='year_month', y=['white', 'black', 'asian', 'hispanic'])
fig.set_xlabel('Period')
fig.set_ylabel('Unemployement percentage')
fig = df.plot(x='year_month', y=['men', 'woman'])
fig.set_xlabel('Period')
fig.set_ylabel('Unemployement percentage')
if __name__ == '__main__':
db_host = input('Nom d\'hôte : ')
if not db_host:
db_host = 'berlin'
db_name = input('Nom de la base de données : ')
if not db_name:
db_name = 'dbclfreville2'
db_user = input('Utilisateur : ')
if not db_user:
db_user = 'clfreville2'
db_password = getpass('Mot de passe : ')
connection = psycopg2.connect(host=db_host, port=5432, database=db_name, user=db_user, password=db_password)
#create_table(connection, 'unemployment_data_us.csv')
graph_evolution(connection)
connection.close()
plt.show()