基于python的数据分解-趋势-季节性-波动变化

时间:2024-07-07 06:59:06
import statsmodels.formula.api as smf #表示线性拟合计算趋势 df = np.loadtxt("elec_prod.txt") t = np.arange(1,397) df_t = np.vstack((df,t)).swapaxes(0,1).astype(int) model_data = pd.DataFrame(df_t,columns=['df','t']) results_f = smf.ols('df~t',data=model_data).fit() print(results_f.summary().tables[1]) print('std = ',np.std(results_f.resid)) fig = plt.figure(figsize=(12,4), dpi=150) ax = fig.add_subplot(111) ax.plot(model_data, linestyle="-", color='red') ax.plot(t,1.423e+05 + 499.2576*t, color='blue') ax.set_ylim((130000, 410000)) ax.set_ylabel(ylabel="Electricity", fontsize=17) ax.set_xlabel(xlabel="Time", fontsize=17) plt.xticks(fontsize=15); plt.yticks(fontsize=15) fig.tight_layout(); plt.savefig(fname='fig/4_2.png') ##%表示曲线拟合计算趋势 df = pd.read_excel('ningxiaGDP.xlsx').rename(columns={'t':'t1'}) df = pd.DataFrame(df['t1'].values**2,columns=['t2']).join(df) results_f = smf.ols('gdp~ 0 + t1+ t2', data=df).fit() print(results_f.summary().tables[1]) print('std = ', np.std(results_f.resid)) from scipy.optimize import curve_fit df = pd.read_excel('ningxiaGDP.xlsx') t = df['t'].values gdp = df['gdp'].values def func(x, b,c):    return b*x + c*x**2 popt, pcov = curve_fit(func,t,gdp,p0=(1.0,1.0)) print(popt) b = popt[0] c = popt[1] residuals = gdp - func(t, b, c) print(np.std(residuals)) t = np.arange(1996, 2016) fig = plt.figure(figsize=(12,4),dpi=150) ax = fig.add_subplot(111) ax.scatter(y=gdp, x=t, color='blue') ax.plot(t, results_f.predict()) ax.xaxis.set_major_locator(ticker.MultipleLocator(3)) ax.set_ylabel(ylabel="宁夏地区生产总值",fontsize=17) ax.set_xlabel(xlabel="时间", fontsize=17) plt.xticks(fontsize=15); plt.yticks(fontsize=15) fig.tight_layout(); plt.savefig(fname='fig/4_3.png') ##%表示5期移动平均拟合,移动平均法计算趋势 from statsmodels.tsa.seasonal import seasonal_decompose nile_ar = np.loadtxt("Nile.txt"); Date = np.arange(1871, 1971) nile_df = pd.DataFrame({"Date":Date, "Nile":nile_ar}) nile_df.index = nile_df["Date"] nile_df['5-period Moving Avg'] = nile_df['Nile'].rolling(5).mean() fig = plt.figure(figsize=(12,4), dpi=150) ax = fig.add_subplot(111) nile_df['Nile'].plot(ax=ax, color='b', marker="o", linestyle='--') nile_df['5-period Moving Avg'].plot(ax=ax, color='r') ax.legend(loc=1,labels=['Index','Moving average'], fontsize=13) ax.set_ylabel(ylabel="Index", fontsize=17) ax.set_xlabel(xlabel="Time", fontsize=17) plt.xticks(fontsize=15); plt.yticks(fontsize=15) fig.tight_layout(); plt.savefig(fname='fig/4_4.png') #%%二次移动平均法计算趋势 gdp_df = pd.read_csv('JDGDP.csv') gdp_df['Moving_Avg_1'] = gdp_df['JDGDP'].rolling(4).mean() gdp_df['Moving_Avg_2'] = gdp_df['Moving_Avg_1'].rolling(4).mean() gdp_df['at'] = 2*gdp_df['Moving_Avg_1'] - gdp_df['Moving_Avg_2'] fig = plt.figure(figsize=(12,4),dpi=150) ax = fig.add_subplot(111) gdp_df['JDGDP'].plot(ax=ax, color='b',marker="o",linestyle='--') gdp_df['at'].plot(ax=ax, color='r') ax.xaxis.set_major_locator(ticker.MultipleLocator(3)) ax.legend(loc=2,labels=['季度GDP','两次移动平均'], fontsize=13) ax.set_ylabel(ylabel="中国季度国内生产总值", fontsize=17) ax.set_xlabel(xlabel="时间", fontsize=17) plt.xticks(fontsize=15); plt.yticks(fontsize=15) fig.tight_layout(); plt.savefig(fname='fig/4_5.png') ##%指数平滑法,考虑近期变化对现在影响较大,而远期的变化对现在影响要小一些 from statsmodels.tsa.api import SimpleExpSmoothing df = np.loadtxt("retail_price_index.txt") index = pd.date_range(start="1990", end="2021", freq="A") retail_df = pd.Series(df, index) fit1 = SimpleExpSmoothing(retail_df, initialization_method="heuristic").fit(smoothing_level=0.2, optimized=False) fcast1 = fit1.forecast(3).rename(r"α=0.2") fit2 = SimpleExpSmoothing(retail_df, initialization_method="heuristic").fit(smoothing_level=0.6, optimized=False) fcast2 = fit2.forecast(3).rename(r"α=0.6") fit3 = SimpleExpSmoothing(retail_df, initialization_method="estimated").fit() fcast3 = fit3.forecast(3).rename(r"α= "% fit3.model.params["smoothing_level"] ) fig = plt.figure(figsize=(12,4), dpi=150) ax = fig.add_subplot(111) ax.plot(retail_df, marker="o", color="black") ax.plot(fit1.fittedvalues, marker="8", color="green",linestyle="-.") (line1,) = ax.plot(fcast1, marker="8", color="green",linestyle="-.") ax.plot(fit2.fittedvalues, marker="s", color="red",linestyle=":") (line2,) = ax.plot(fcast2, marker="s", color="red",linestyle=":") ax.plot(fit3.fittedvalues, marker="p", color="blue",linestyle="--") (line3,) = ax.plot(fcast3, marker="p", color="blue",linestyle="--") plt.legend([line1, line2, line3], [fcast1.name, fcast2.name, fcast3.name], fontsize=15) ax.set_ylabel(ylabel="商品零售价格指数", fontsize=17) ax.set_xlabel(xlabel="时间", fontsize=17) plt.xticks(fontsize=15); plt.yticks(fontsize=15) fig.tight_layout(); plt.savefig(fname='fig/4_6.png') ##%对于含有季节变动部分的时间序列为Holt-Winters指数平滑法 from statsmodels.tsa.api import ExponentialSmoothing df = np.loadtxt("QGDP.txt") index = pd.date_range(start="2000", end="2021", freq="Q") QGDP_df = pd.Series(df, index) fit = ExponentialSmoothing(QGDP_df, seasonal_periods=4, trend="add", seasonal="mul",initialization_method="estimated").fit() simulations = fit.simulate(8, repetitions=1000, error="mul") fig = plt.figure(figsize=(12,4), dpi=150) ax = fig.add_subplot(111) ax.plot(QGDP_df, marker="o", color="black") ax.plot(fit.fittedvalues, marker="o", color="blue", linestyle=":") ax.plot(simulations, marker="o", color="blue", linestyle=":") ax.set_ylabel(ylabel="国内季度生产总值累计值", fontsize=17) ax.set_xlabel(xlabel="时间",fontsize=17) plt.xticks(fontsize=15); plt.yticks(fontsize=15) fig.tight_layout(); plt.savefig(fname='fig/4_8.png') ##%季节效应,季节指数,用简单平均法计算的周期内各时期季节性影响的相对数。 df = np.loadtxt("tempdub.txt") index = pd.date_range(start="1964", end="1976", freq="M") tempdub_df = pd.Series(df, index) fig = plt.figure(figsize=(12,4), dpi=150) ax = fig.add_subplot(111) ax.plot(tempdub_df, marker="o", color="blue") ax.set_ylabel(ylabel="杜比克市月平均气温",fontsize=17) ax.set_xlabel(xlabel="时间",fontsize=17) plt.xticks(fontsize=15);plt.yticks(fontsize=15) fig.tight_layout(); plt.savefig(fname='fig/4_9.png') t = np.arange(1,13) SI = np.array([0.36,0.45,0.7,1.00,1.26,1.46,1.55,1.50,1.32,1.10,0.79,0.51]) Season_index = pd.Series(SI, t) fig = plt.figure(figsize=(12,4), dpi=150) ax = fig.add_subplot(111) ax.plot(Season_index, marker="o", color="blue") ax.set_ylabel(ylabel="季节指数",fontsize=17) ax.set_xlabel(xlabel="时间",fontsize=17) plt.xticks(fontsize=15);plt.yticks(fontsize=15) fig.tight_layout(); plt.savefig(fname='fig/4_10.png')