You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

172 lines
6.8 KiB

import pandas
import psycopg2 # pip3 install types-psycopg2
import matplotlib.pyplot as plt
from getpass import getpass
import json
import folium
from folium.plugins import BeautifyIcon
from typing import Union
def create_tables(connection: psycopg2.connection, filename: str):
cur = connection.cursor()
with open(filename) as f:
cur.execute(f.read())
connection.commit()
cur.close()
def load_currencies(connection: psycopg2.connection, filename: str):
cur = connection.cursor()
with open(filename) as f:
for iso, full_name in json.load(f).items():
cur.execute("INSERT INTO currency VALUES (%s, %s);", (iso, full_name))
connection.commit()
cur.close()
def format_price(price: Union[float, str]) -> float:
if isinstance(price, float):
return price
return float(price.replace(',', ''))
def format_award(award: str) -> int:
if award == '3 MICHELIN Stars':
return 3
if award == '2 MICHELIN Stars':
return 2
if award == '1 MICHELIN Star':
return 1
return 0
def color_award(stars: int) -> str:
if stars == 3:
return 'orange'
if stars == 2:
return '#c0c0c0'
if stars == 1:
return '#c49c48'
return '#132b5e'
def stars_radius(stars: int) -> int:
if stars > 1:
return 5
if stars == 1:
return 4
return 3
def load_restaurants(connection: psycopg2.connection, filename: str):
cur = connection.cursor()
df = pandas.DataFrame(pandas.read_csv(filename))
for row in df.itertuples():
cur.execute("INSERT INTO restaurant (name, location, min_price, max_price, currency, latitude, longitude, phone_number, url, website_url, stars) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING id;",
(row.Name, row.Location, format_price(row.MinPrice), format_price(row.MaxPrice), row.Currency if not pandas.isnull(row.Currency) else None, row.Latitude, row.Longitude, row.PhoneNumber, row.Url, row.WebsiteUrl, format_award(row.Award))
)
restaurant_id = cur.fetchone()[0]
for cuisine in row.Cuisine.split(', '):
#cur.execute("INSERT INTO cuisine (name) VALUES (%s) ON CONFLICT DO NOTHING RETURNING id;", (cuisine,))
cur.execute("INSERT INTO cuisine (name) VALUES (%s) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id;", (cuisine,))
cuisine_id = cur.fetchone()[0]
cur.execute("INSERT INTO cuisine_restaurant VALUES (%s, %s);", (restaurant_id, cuisine_id))
connection.commit()
cur.close()
def cuisine(connection: psycopg2.connection):
df = pandas.read_sql("""SELECT c.name, COUNT(*) restaurants
FROM cuisine_restaurant cr
INNER JOIN cuisine c ON c.id = cr.cuisine
GROUP BY c.id
ORDER BY 2 DESC
LIMIT 20;""", con=connection)
fig = df.plot.barh(x='name', y='restaurants', legend=False)
fig.invert_yaxis()
fig.set_title('Most common cuisine types in the Michelin guide')
fig.set_xlabel('Cuisine type')
fig.set_ylabel('Number of restaurants')
fig.bar_label(fig.containers[0]) # Show values
def currencies(connection: psycopg2.connection):
df = pandas.read_sql("""SELECT c.iso, c.full_name, COUNT(*) restaurants
FROM restaurant r
INNER JOIN currency c ON c.iso = r.currency
GROUP BY c.iso
ORDER BY 3 DESC
LIMIT 10;""", con=connection)
fig = df.plot.bar(x='iso', y='restaurants', legend=False, rot=0)
fig.set_title('Most common accepted currencies in the Michelin guide')
fig.set_xlabel('Currency')
fig.set_ylabel('Number of restaurants')
fig.bar_label(fig.containers[0]) # Show values
def prices(connection: psycopg2.connection):
df = pandas.read_sql("""SELECT CASE WHEN min_price < 10 THEN '< 10'
WHEN min_price >= 10 AND min_price < 15 THEN '[10;15['
WHEN min_price >= 15 AND min_price < 20 THEN '[15;20['
WHEN min_price >= 20 AND min_price < 30 THEN '[20;30['
WHEN min_price >= 30 AND min_price < 50 THEN '[30;50['
WHEN min_price >= 50 AND min_price < 100 THEN '[50;100['
WHEN min_price >= 100 AND min_price < 200 THEN '[100;200['
WHEN min_price >= 200 AND min_price < 500 THEN '[200;500['
WHEN min_price >= 500 AND min_price < 1000 THEN '[500;1000['
WHEN min_price >= 1000 THEN '>= 1000'
END min_price_range,
COUNT(*) nb
FROM restaurant
WHERE currency = 'EUR'
GROUP BY min_price_range;""", con=connection)
fig = df.plot.pie(y='nb', labels=df['min_price_range'], legend=False)
def map(connection: psycopg2.connection):
map_osm = folium.Map(zoom_start=4)
groups = [folium.FeatureGroup('Bib Gourmand' if stars == 0 else str(stars) + ' stars') for stars in range(4)]
df = pandas.read_sql("""SELECT latitude, longitude, name, min_price, max_price, currency, stars FROM restaurant;""", con=connection)
#df['color'] = df.apply(color_award, axis=1)
for _, latitude, longitude, name, min_price, max_price, currency, stars in df[['latitude', 'longitude', 'name', 'min_price', 'max_price', 'currency', 'stars']].itertuples():
is_star = stars != 0
popup = (
"<strong>{name}</strong><br>"
"From {min_price} to {max_price} {currency}<br>"
"{award}<br>"
"______________________"
).format(name=name, min_price=min_price, max_price=max_price, currency=currency, award=str(stars) + ' stars' if is_star else 'Bib Gourmand')
color = color_award(stars)
if stars == 3:
icon = BeautifyIcon(icon='star', inner_icon_style=('color:' + color), border_color='transparent', background_color='transparent')
folium.Marker(location=[latitude, longitude], popup=popup, radius=stars_radius(stars), fill=True, fill_opacity=1, icon=icon).add_to(groups[stars])
else:
folium.CircleMarker(location=[latitude, longitude], popup=popup, color=color, fill_color=color, radius=stars_radius(stars), fill=True, fill_opacity=1).add_to(groups[stars])
for group in groups:
map_osm.add_child(group)
map_osm.add_child(folium.map.LayerControl())
map_osm.save('/tmp/test_map.html')
if __name__ == '__main__':
db_host = input('Nom d\'hôte : ')
if not db_host:
db_host = 'berlin'
db_name = input('Nom de la base de données : ')
if not db_name:
db_name = 'dbclfreville2'
db_user = input('Utilisateur : ')
if not db_user:
db_user = 'clfreville2'
db_password = getpass('Mot de passe : ')
connection = psycopg2.connect(host=db_host, port=5432, database=db_name, user=db_user, password=db_password)
#create_tables(connection, 'table.sql')
#load_currencies(connection, 'currencies.json')
#load_restaurants(connection, 'michelin_my_maps.csv')
cuisine(connection)
currencies(connection)
prices(connection)
#map(connection)
connection.close()
plt.show()