基于Python多城市空气质量数据可视化

一、多城市空气质量数据可视化

数据:2018年北京、上海、广州、深圳四座城市的空气质量数据

  • beijing_AQI_2018.csv
  • shanghai_AQI_2018.csv
  • guangzhou_AQI_2018.csv
  • shenzhen_AQI_2018.csv

1.2018年广州AQI全年走势

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Line

data = pd.read_csv('guangzhou_AQI_2018.csv')
time = data['Date']
aqi = data['AQI']
l = (
Line()
.add_xaxis(list(time)) # 设置x轴数据
.add_yaxis("",list(aqi),areastyle_opts=opts.AreaStyleOpts(opacity=0.2,color='#009999')) # 设置y轴数据
.set_global_opts(title_opts=opts.TitleOpts(title="2018年广州AQI全年走势图\nGiesen",pos_left='center')) # 设置标题居中
.set_series_opts(
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]), # 标记线
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max"),
opts.MarkPointItem(type_="min")],symbol='diamond',symbol_size=35), # 标记点
)
)
l.render_notebook()

2.2018年深圳AQI全年走势

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Line

data = pd.read_csv('shenzhen_AQI_2018.csv')
time = data['Date']
aqi = data['AQI']
l = (
Line()
.add_xaxis(list(time)) # 设置x轴数据
.add_yaxis("",list(aqi),areastyle_opts=opts.AreaStyleOpts(opacity=0.3,color='#FFFF33')) # 设置y轴数据
.set_global_opts(title_opts=opts.TitleOpts(title="2018年深圳AQI全年走势图\nGiesen",pos_left='center')) # 设置标题居中
.set_series_opts(
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]), # 标记线
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max"),
opts.MarkPointItem(type_="min")],symbol='diamond',symbol_size=35), # 标记点
)
)
l.render_notebook()

3.2018年北上广深AQI全年走势

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Line

filename = ['beijing_AQI_2018.csv','shanghai_AQI_2018.csv','guangzhou_AQI_2018.csv','shenzhen_AQI_2018.csv']
month_aqi_list=[]

for i in filename:
data = pd.read_csv(i)
m_list=[]
for j in data['Date']:
month=j.split('/')[1]
m_list.append(month)
data['month'] = m_list
month_data = data.groupby(['month'])
month_aqi = month_data['AQI'].agg(['mean'])
month_aqi.reset_index(inplace=True)
month_aqi = month_aqi.astype('int')
month_aqi = month_aqi.sort_values(by='month')
month_aqi_list.append(list(month_aqi['mean']))

month = [str(i)+'月'for i in range(1,13)]

l = (
Line()
.add_xaxis(month)
.add_yaxis("北京",month_aqi_list[0])
.add_yaxis("上海",month_aqi_list[1])
.add_yaxis("广州",month_aqi_list[2])
.add_yaxis("深圳",month_aqi_list[3])
.set_global_opts(title_opts=opts.TitleOpts(title="2018年北上广深AQI全年走势图\nGiesen",pos_left='center'),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
legend_opts = opts.LegendOpts(is_show = True,pos_bottom="0%")
)
)
l.render_notebook()

4.2018年广州月均AQI走势

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Line

data = pd.read_csv('guangzhou_AQI_2018.csv')
m_list=[]
for i in data['Date']:
month = i.split('/')[1]
m_list.append(month)
data['month'] = m_list
month_data = data.groupby(['month'])
month_aqi = month_data['AQI'].agg(['mean'])
month_aqi.reset_index(inplace=True)
month_aqi = month_aqi.astype('int')
month_aqi = month_aqi.sort_values(by='month')
month = [str(i)+'月'for i in range(1,13)]
l = (
Line()
.add_xaxis(month)
.add_yaxis('',list(month_aqi['mean']))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年广州月均AQI走势图\nGiesen',pos_left='center',pos_top=20),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)))
.set_series_opts(label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='min'),
opts.MarkPointItem(type_='max')]))
)
l.render_notebook()

5.2018年深圳月均AQI走势

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Line

data = pd.read_csv('shenzhen_AQI_2018.csv')
m_list=[]
for i in data['Date']:
month = i.split('/')[1]
m_list.append(month)
data['month'] = m_list
month_data = data.groupby(['month'])
month_aqi = month_data['AQI'].agg(['mean'])
month_aqi.reset_index(inplace=True)
month_aqi = month_aqi.astype('int')
month_aqi = month_aqi.sort_values(by='month')
month = [str(i)+'月'for i in range(1,13)]
l = (
Line()
.add_xaxis(month)
.add_yaxis('',list(month_aqi['mean']))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年深圳月均AQI走势图\nGiesen',pos_left='center',pos_top=20),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)))
.set_series_opts(label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='min'),
opts.MarkPointItem(type_='max')]))
)
l.render_notebook()

6.2018年广州季度AQI箱型图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Boxplot
data = pd.read_csv('guangzhou_AQI_2018.csv')
quarter1,quarter2,quarter3,quarter4 = [],[],[],[]
for i,j in zip(data['Date'],data['AQI']):
month = i.split('/')[1]
if month in ['1','2','3']:
quarter1.append(j)
elif month in ['4','5','6']:
quarter2.append(j)
elif month in ['7','8','9']:
quarter3.append(j)
else:
quarter4.append(j)
quarter_aqi = [quarter1,quarter2,quarter3,quarter4]
# quarter_aqi

b = Boxplot()
b.add_xaxis(['第一季度','第二季度','第三季度','第四季度'])
b.add_yaxis('',b.prepare_data(quarter_aqi))
b.set_global_opts(title_opts=opts.TitleOpts(title='2018年广州季度AQI箱型图\nGiesen',pos_left='center'),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)))
b.render_notebook()

7.2018年广州全年空气质量情况

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Pie
data = pd.read_csv('guangzhou_AQI_2018.csv')

quality_grade_statistics = data.groupby(['Quality_grade'])
quality_grade_count = quality_grade_statistics['Quality_grade'].agg(['count'])
quality_grade_count.reset_index(inplace=True) # 重置quality_grade_count的索引
quality_grade_count = quality_grade_count.sort_values(by='count',ascending=False) # 按照count字段降序

p = (
Pie()
.add('',[list(z) for z in zip(list(quality_grade_count['Quality_grade']),list(quality_grade_count['count']))],
radius=["40%", "70%"],label_opts=opts.LabelOpts(formatter='{b}:{d}%'))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年广州全年空气质量情况\nGiesen',pos_left='center'),
legend_opts = opts.LegendOpts(orient='vertical',pos_left='10%',pos_top='10%'))
.set_series_opts()
)
p.render_notebook()

8.2018年北上广深全年空气质量情况环形图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data1 = pd.read_csv('beijing_AQI_2018.csv')
data2 = pd.read_csv('shanghai_AQI_2018.csv')
data3 = pd.read_csv('guangzhou_AQI_2018.csv')
data4 = pd.read_csv('shenzhen_AQI_2018.csv')

quality_grade_statistics1 = data1.groupby(['Quality_grade'])
quality_grade_count1 = quality_grade_statistics1['Quality_grade'].agg(['count'])
quality_grade_statistics2 = data2.groupby(['Quality_grade'])
quality_grade_count2 = quality_grade_statistics2['Quality_grade'].agg(['count'])
quality_grade_statistics3 = data3.groupby(['Quality_grade'])
quality_grade_count3 = quality_grade_statistics3['Quality_grade'].agg(['count'])
quality_grade_statistics4 = data4.groupby(['Quality_grade'])
quality_grade_count4 = quality_grade_statistics4['Quality_grade'].agg(['count'])

quality_list = quality_grade_count1.append(quality_grade_count2).append(quality_grade_count3).append(quality_grade_count4)
quality_list_sum = quality_list.groupby('Quality_grade')['count'].agg(['sum'])
quality_list_sum.reset_index(inplace=True)
quality_list_sum

plt.rcParams['font.family']=['SimHei']
plt.figure(figsize=(6,6),dpi=100)
plt.pie(quality_list_sum['sum'],
wedgeprops={'width': 0.6},
autopct=lambda pct: "{:.1f}%\n({:d})".format(pct, int(pct/100*np.sum(quality_list_sum['sum']))),
labels=quality_list_sum['Quality_grade'])
plt.title('2018年北上广深全年空气质量情况环形图\nGiesen',size=16,pad=12)
plt.legend(loc=1,bbox_to_anchor=(1.2,1))
plt.show()

9.2018年广州PM2.5指数日历图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import pandas as pd
import datetime
from pyecharts import options as opts
from pyecharts.charts import Calendar
data = pd.read_csv('guangzhou_AQI_2018.csv')
pm_data = []
for i,j in zip(list(data['Date']),list(data['PM'])):
data_list = i.split('/')
time = datetime.date(int(data_list[0]),int(data_list[1]),int(data_list[2]))
pm_data.append([str(time),int(j)])

c = (
Calendar(init_opts=opts.InitOpts(height='260px'))
.add("",pm_data,calendar_opts=opts.CalendarOpts(range_="2018"))
.set_global_opts(title_opts=opts.TitleOpts(title='2018年广州PM2.5指数日历图\t\t(Giesen)',pos_left='center',
title_textstyle_opts=opts.TextStyleOpts(font_size=15)),
visualmap_opts=opts.VisualMapOpts(max_=300,
orient='horizontal', # 水平放置visualMap组件
is_piecewise=True,
split_number=6, # 自动切分为6段
pos_bottom="6%",
pos_left="center")
)

)
c.render_notebook()

10.2018年北上广深全年空气质量情况多饼图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Pie

filename=['beijing_AQI_2018.csv','shanghai_AQI_2018.csv','guangzhou_AQI_2018.csv','shenzhen_AQI_2018.csv']
quality_grade_count_list=[]
for i in filename:
data = pd.read_csv(i)
quality_grade_statistics = data.groupby(['Quality_grade'])
quality_grade_count = quality_grade_statistics['Quality_grade'].agg(['count'])
quality_grade_count.reset_index(inplace=True)
quality_grade_count = quality_grade_count.sort_values(by='count',ascending=False)
quality_grade_count_list.append(quality_grade_count)

p = (
Pie()
.add('北京',[list(z) for z in zip(list(quality_grade_count_list[0]['Quality_grade']),list(quality_grade_count_list[0]['count']))],
center=['30%','30%'],radius=['20%','40%'])
.add('上海',[list(z) for z in zip(list(quality_grade_count_list[1]['Quality_grade']),list(quality_grade_count_list[1]['count']))],
center=['60%','30%'],radius=['20%','40%'])
.add('广州',[list(z) for z in zip(list(quality_grade_count_list[2]['Quality_grade']),list(quality_grade_count_list[2]['count']))],
center=['30%','75%'],radius=['20%','40%'])
.add('深圳',[list(z) for z in zip(list(quality_grade_count_list[3]['Quality_grade']),list(quality_grade_count_list[3]['count']))],
center=['60%','75%'],radius=['20%','40%'])
.set_global_opts(title_opts=opts.TitleOpts(title='2018年北上广深全年空气质量情况\n\t\tGiesen',pos_left='30%'),
legend_opts=opts.LegendOpts(orient="vertical",pos_right='10%',pos_top='10%'))
.set_series_opts(label_opts=opts.LabelOpts(formatter='{a}',position='center',font_size=20),
tooltip_opts=opts.TooltipOpts(formatter='{a}<br/>{b}:{c}({d}%)'))

)
p.render_notebook()

二、尾巴