import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#ADF单位根检验
from statsmodels.tsa.stattools import adfuller
#序列分解(季节+趋势+残差)
from statsmodels.tsa.seasonal import seasonal_decompose
#ACF/PACF
from statsmodels.tsa.stattools import acf, pacf
#绘制ACF,PACF图
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
#建立ARIMA(p,d,q)
from statsmodels.tsa.arima_model import ARIMA
#读取数据(将时间列设置为索引)
ts = pd.read_csv('D:/passenger.csv',encoding='utf-8',index_col='time')
ts.index = pd.to_datetime(ts.index)
'''
输入ts['1960'] --- 会输出1960年的数据记录(1月到12月)
输入ts['1960-8'] --- 会输出1960年8月份的数据记录
'''
#绘制时序图(含有上升趋势,则序列非平稳)
plt.plot(ts,color='red')
plt.show()
#ADF检验(P-value>0.05,则序列非平稳)
dftest = adfuller(ts['num'],1,autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput)
#自相关图ACF和偏相关图PACF
fig,ax = plt.subplots(2,1,figsize=(6,12))
plot_acf(ts,lags=20,ax=ax[0])
plot_pacf(ts,lags=20,ax=ax[1])
#序列分解
decomposition = seasonal_decompose(ts)
trend = decomposition.trend
season = decomposition.seasonal
residual = decomposition.resid
fig,ax = plt.subplots(4,1,figsize=(10,16))
ax[0].plot(ts)
ax[0].legend(['origin'])
ax[1].plot(trend)
ax[1].legend(['trend'])
ax[2].plot(season)
ax[2].legend(['seasonal'])
ax[3].plot(residual)
ax[3].legend(['residuals'])
注:前期时序分析没有问题,但是ARIMA建模时报错,该问题尚未解决