本项目是利用五年左右的世界地震数据,通过python的pandas库、matplotlib库、basemap库等进行数据可视化,绘制出地震散点图。主要代码如下所示
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
from __future__ import division
import pandas as pd
from pandas import series,dataframe
import numpy as np
from matplotlib.patches import polygon
chi_provinces = [ '北京' , '天津' , '上海' , '重庆' ,
'河北' , '山西' , '辽宁' , '吉林' ,
'黑龙江' , '江苏' , '浙江' , '安徽' ,
'福建' , '江西' , '山东' , '河南' ,
'湖北' , '湖南' , '广东' , '海南' ,
'四川' , '贵州' , '云南' , '陕西' ,
'甘肃' , '青海' , '*' , '内蒙古' ,
'广西' , '*' , '宁夏' , '*' ,
'香港' , '澳门' ] #list of chinese provinces
def is_in_china( str ):
if str [: 2 ] in chi_provinces:
return true
else :
return false
def convert_data_2014(x):
try :
return float (x.strip())
except valueerror:
return x
except attributeerror:
return x
def format_lat_lon(x):
try :
return x / 100
except (typeerror):
return np.nan
df = pd.read_excel(r 'c:/users/ggws/desktop/shuju/201601-12.xls' )
df = df.append(pd.read_excel(r 'c:/users/ggws/desktop/shuju/201201-12.xls' ),ignore_index = true)
df = df.append(pd.read_excel(r 'c:/users/ggws/desktop/shuju/shuju.xls' ),ignore_index = true)
df = df.append(pd.read_excel(r 'c:/users/ggws/desktop/shuju/201501-12.xls' ),ignore_index = true)
df_2014 = pd.read_excel(r 'c:/users/ggws/desktop/shuju/201401-12.xls' ) #have to introduce statics of 2014 independently because the format and the type of data of specific column in this data set are different from others
df[ 'longitude' ] = df[ 'longitude' ]. apply (convert_data_2014)
df[ 'latitude' ] = df[ 'latitude' ]. apply (convert_data_2014)
df_2014[ 'longitude' ] = df_2014[ 'longitude' ]. apply (convert_data_2014)
df_2014[ 'latitude' ] = df_2014[ 'latitude' ]. apply (convert_data_2014)
df = df.append(df_2014,ignore_index = true)
df = df[[ 'latitude' , 'longitude' , 'magnitude' , 'referenced place' , 'time' ]] #only save four columns as valuable statics
df[[ 'longitude' , 'latitude' ]] = df[[ 'longitude' , 'latitude' ]].applymap(format_lat_lon) #use function "applymap" to convert the format of the longitude and latitude statics
df = df.dropna(axis = 0 ,how = 'any' ) #drop all rows that have any nan values
format_magnitude = lambda x: float ( str (x).strip( 'ml' ))
df[ 'magnitude' ] = df[ 'magnitude' ]. apply (format_magnitude)
#df = df[df['referenced place'].apply(is_in_china)]
lon_mean = (df[ 'longitude' ].groupby(df[ 'referenced place' ])).mean()
lat_mean = (df[ 'latitude' ].groupby(df[ 'referenced place' ])).mean()
group_counts = (df[ 'magnitude' ].groupby(df[ 'referenced place' ])).count()
after_agg_data = pd.concat([lon_mean,lat_mean,group_counts], axis = 1 )
after_agg_data.rename(columns = { 'magnitude' : 'counts' } , inplace = true)
#aggregate after grouping the data
after_sorted_data = after_agg_data.sort_values(by = 'counts' ,ascending = false)
new_index = np.arange( len (after_sorted_data.index))
after_sorted_data.index = new_index
paint_data = after_sorted_data[after_sorted_data[ 'counts' ]> = after_sorted_data[ 'counts' ][ 80 ]]
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import basemap
plt.figure(figsize = ( 16 , 8 ))
m = basemap()
m.readshapefile(r 'c:/users/ggws/desktop/jb/gadm36_chn_1' , 'states' , drawbounds = true)
ax = plt.gca()
'''
for nshape,seg in enumerate (m.states):
poly = polygon(seg,facecolor = 'r')
ax.add_patch(poly)
'''
m.drawcoastlines(linewidth = 0.5 )
m.drawcountries(linewidth = 0.5 )
m.shadedrelief()
for indexs in df.index:
lon2,lat2 = df.loc[indexs].values[ 1 ],df.loc[indexs].values[ 0 ]
x,y = m(lon2,lat2)
m.plot(x,y, 'ro' ,markersize = 0.5 ) #获取经度值
'''
for indexs in after_sorted_data.index[:80]:
lon,lat = after_sorted_data.loc[indexs].values[0],after_sorted_data.loc[indexs].values[1]
x,y = m(lon,lat)
m.plot(x,y,'wo',markersize = 10*(after_sorted_data.loc[indexs].values[2]/after_sorted_data.loc[0].values[2]))
'''
plt.title( "worldwide earthquake" )
plt.show()
#indexs-len(df.index)+80
|
效果如下
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/qq_36228216/article/details/86680246