可以看到这是四个数据集,按照要求依次计算,最后绘制散点图。
import random import numpy as np import scipy as sp import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import statsmodels.api as sm #import statsmodels.formula.api as smf anascombe = pd.read_csv('anscombe.csv') anascombe.head() print(anascombe) x1 = anascombe.x[:10].values x2 = anascombe.x[11:21].values x3 = anascombe.x[22:32].values x4 = anascombe.x[33:43].values y1 = anascombe.y[:10].values y2 = anascombe.y[11:21].values y3 = anascombe.y[22:32].values y4 = anascombe.y[33:43].values print("\n") print("Part 1.1") _x1 = np.mean(x1) _y1 = np.mean(y1) dx1 = np.var(x1) dy1 = np.var(y1) print("mean of x1 : ", _x1) print("mean of y1 : ", _y1) print("variance of x1 : ", dx1) print("variance of y1 : ", dy1) _x2 = np.mean(x2) _y2 = np.mean(y2) dx2 = np.var(x2) dy2 = np.var(y2) print("mean of x2 : ", _x2) print("mean of y2 : ", _y2) print("variance of x2 : ", dx2) print("variance of y2 : ", dy2) _x3 = np.mean(x3) _y3 = np.mean(y3) dx3 = np.var(x3) dy3 = np.var(y3) print("mean of x3 : ", _x3) print("mean of y3 : ", _y3) print("variance of x3 : ", dx3) print("variance of y3 : ", dy3) _x4 = np.mean(x4) _y4 = np.mean(y4) dx4 = np.var(x4) dy4 = np.var(y4) print("mean of x4 : ", _x4) print("mean of y4 : ", _y4) print("variance of x4 : ", dx4) print("variance of y4 : ", dy4) print("\n") print("Part 1.2") coeff1 = np.corrcoef(x1,y1) print("correlation coefficient between x1 and y1 is ",coeff1[0][1]) coeff2 = np.corrcoef(x2,y2) print("correlation coefficient between x2 and y2 is ",coeff2[0][1]) coeff3 = np.corrcoef(x3,y3) print("correlation coefficient between x3 and y3 is ",coeff3[0][1]) coeff4 = np.corrcoef(x4,y4) print("correlation coefficient between x4 and y4 is ",coeff4[0][1]) print("\n") print("Part 1.3") X1 = sm.add_constant(x1) est1 = sm.OLS(y1,X1) est1 = est1.fit() para1 = est1.params print("Dataset1: y1 =", para1[0], "+", para1[1], "* x1") X2 = sm.add_constant(x2) est2 = sm.OLS(y2,X2) est2 = est2.fit() para2 = est2.params print("Dataset2: y2 =", para2[0], "+", para2[1], "* x2") X3 = sm.add_constant(x3) est3 = sm.OLS(y3,X3) est3 = est3.fit() para3 = est3.params print("Dataset3: y3 =", para3[0], "+", para3[1], "* x3") X4 = sm.add_constant(x4) est4 = sm.OLS(y4,X4) est4 = est4.fit() para4 = est4.params print("Dataset4: y4 =", para4[0], "+", para4[1], "* x4") print("\n") g = sns.FacetGrid(anascombe, col="dataset") g.map(plt.scatter, "x","y") plt.show()