-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_vis.py
204 lines (183 loc) · 8.11 KB
/
data_vis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
from dash import Dash, dcc, html, Input, Output, State, ctx
from io import StringIO
from jupyter_dash import JupyterDash
from os.path import exists
import dash
import dash_bootstrap_components as dbc
import json
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import requests
pio.templates.default = "plotly_dark"
# Load site locations from the internet
# This will allow us, using the site's code, to plot all our stations on a map
# Note: it has some incoherencies that we fix here (i.e. misnamed columns)
site_locations_url = 'https://www.lcsqa.org/system/files/media/documents/Liste%20points%20de%20mesures%202020%20pour%20site%20LCSQA_221292021.xlsx'
site_locations = pd.read_excel(site_locations_url, sheet_name='Points de mesure', header=2)
site_locations = site_locations.rename(columns={'Code station': 'code site', 'Latitude': 'unused', 'Longitude': 'Latitude', 'NO2': 'Longitude'})
site_locations = site_locations[['code site', 'Nom station', 'Latitude', 'Longitude']]
site_locations = site_locations[(site_locations.Latitude > 0) & (site_locations.Longitude > -6)]
# Load pollution data
# We only load some of the available dates for speed
def get_data():
feather_file = 'pollution_data.feather'
if exists(feather_file):
return pd.read_feather(feather_file)
else:
url_tpl = 'https://files.data.gouv.fr/lcsqa/concentrations-de-polluants-atmospheriques-reglementes/temps-reel/2021/FR_E2_2021-{:02}-{:02}.csv'
wanted_columns = ['Date de début', 'Date de fin', 'code site', 'nom site', 'Polluant', 'valeur', 'valeur brute', 'unité de mesure', 'validité']
all_dfs = []
for month in range(1):
for day in range(14):
url = url_tpl.format(month+1, day+1)
response = requests.get(url)
print('Loading {}'.format(url))
if not response.ok:
continue
csv_str = response.content.decode('utf-8')
csv_str_io = StringIO(csv_str)
df = pd.read_csv(csv_str_io, sep=';')
df = df[wanted_columns]
# convert the 'Date' column to datetime format
df['Date de début']= pd.to_datetime(df['Date de début'])
df['Date de fin']= pd.to_datetime(df['Date de fin'])
all_dfs.append(df)
df = pd.concat(all_dfs).reset_index()
df.fillna(0)
df.to_feather(feather_file)
return df
df = get_data()
# Gather a list of all available polluants
polluants = df['Polluant'].unique()
site_names = df['nom site'].unique()
# only keep locations that have pollution data
site_locations = site_locations[site_locations['Nom station'].isin(site_names)]
# Combined all these informations into on DataFrame for plotting
df_mean_pollution = df[['code site', 'valeur']].groupby(by=['code site']).mean()
fig = go.Figure()
# This Scatter plot acts as an outline for our points
fig.add_trace(go.Scattermapbox(
lat=site_locations['Latitude'], lon=site_locations['Longitude'],
mode='markers',
marker=go.scattermapbox.Marker(
size=12,
color='#333333'
)
))
# This plots ou actual data, colored by average pollution levels
fig.add_trace(
go.Scattermapbox(
lat=site_locations['Latitude'],
lon=site_locations['Longitude'],
hovertemplate =
'<b>%{text}</b><br>'+
'%{lat}°<b>N</b> %{lon}°<b>E</b>',
text = site_locations["Nom station"],
marker=go.scattermapbox.Marker(
size=10,
color=df_mean_pollution['valeur'],
opacity=0.9,
colorscale=['#882211', '#995544', '#aa9988']
)
))
fig.update_layout(
autosize=True,
margin={"r":10,"t":30,"l":10,"b":10},
hovermode='closest',
clickmode='event+select',
showlegend=False,
mapbox=dict(
bearing=0,
style="carto-darkmatter",
zoom=5,
center=go.layout.mapbox.Center(
lat=46.5,
lon=3
)
),
)
fig.data[0].update(unselected={'marker': {'opacity':0.7}})
fig.data[1].update(selected={'marker': {'size':14, 'color':'#bb8822', 'opacity':1}},
unselected={'marker': {'opacity':0.7}})
DEFAULT_PLOTLY_COLORS=['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
# Build small example app.
app = JupyterDash(__name__)
pollution_for_selected_site = df[(df['nom site'] == 'Lyon Périphérique') & (df['Polluant'] == 'PM10')]
def create_empty_figure(polluant):
fig = go.Figure()
fig.update_layout(title=f'<b>{polluant}</b>',
title_y=0.8, title_x=0.03,
margin={'l':0, 'r':0, 't':30, 'b':0},
yaxis_title=None, xaxis_title=None)
return fig
checklist = html.Div(children=[dcc.Checklist(
polluants,
polluants,
inline=True,
id='pollution_checklist'
)], id='checklist_div')
app.layout = html.Div([
html.H1('France Pollution Visualization', className='page_title'),
dcc.Graph(className='child', id='map_sensors', figure=fig),
html.Div(children=[
checklist,
html.Div([html.H1('Select location on map to display pollution data')], className='header_div', id='line_graphs')
],
className='child')
], className='parent')
color_palettes = [px.colors.qualitative.Plotly,
px.colors.qualitative.D3,
px.colors.qualitative.Antique,
px.colors.qualitative.Bold,
px.colors.qualitative.Safe,
px.colors.qualitative.Vivid,
px.colors.qualitative.Set3,
px.colors.qualitative.Dark2]
# Builds all the line graphs from the selected point
@app.callback(
Output('line_graphs', 'children'),
Input('map_sensors', 'selectedData'),
Input('pollution_checklist', 'value'))
def display_selected_data(selectedData, value):
checklist.value = value
if selectedData is not None and len(selectedData) > 0:
site_names = [selected_point['text'] for selected_point in selectedData['points']]
pollution_for_selected_sites = [df[df['nom site'] == site_name] for site_name in site_names]
children = []
for i, polluant in enumerate(polluants):
if polluant not in value:
continue
all_dfs = []
for pollution_for_selected_site in pollution_for_selected_sites:
all_dfs.append(pollution_for_selected_site[pollution_for_selected_site['Polluant'] == polluant])
pollution_for_selected_site_for_polluant = pd.concat(all_dfs)
if pollution_for_selected_site_for_polluant.empty:
continue
fig = px.line(pollution_for_selected_site_for_polluant,
x='Date de début', y='valeur', color='nom site',
color_discrete_sequence=color_palettes[i],
labels={"nom site": "Site"})
fig.update_traces(line_width=1) #, line_color=DEFAULT_PLOTLY_COLORS[i])
fig.update_layout(title=f'<b>{polluant}</b>',
title_y=0.8, title_x=0.03,
margin={'l':0, 'r':0, 't':30, 'b':0},
yaxis_title=None, xaxis_title=None)
graph = dcc.Graph(id=polluant.replace('.', '_'), figure=fig, className='line-graph')
children.append(graph)
if len(children) == 0:
return [html.H1('No data for locations: {}.'.format(site_names))]
graph_height = '{}%'.format(100/len(children))
for graph in children:
graph.style = {'height': graph_height}
return children
else:
return [html.Div([html.H1('Select location on map to display pollution data')])]
if __name__ == '__main__':
app.run_server(mode='external')