代码:
#matplotlib inline import random import numpy as np import scipy as sp import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import statsmodels.api as sm import statsmodels.formula.api as smf import statistics as sta import scipy.stats.stats as stats anascombe = sns.load_dataset("anscombe") #显示平均数 print("the mean of x and y are:") print(anascombe.groupby('dataset')['x','y'].mean()) print("\n") #显示方差 print("the variance of x and y are:") print(anascombe.groupby('dataset')['x', 'y'].var()) print("\n") print("the correlation coefficient between x and y are:") print(anascombe.groupby('dataset').corr()) print("\n") print("the first linear regression line:") lin_model_1 = smf.ols('y ~ x', anascombe.groupby('dataset').get_group('I')).fit() print(lin_model_1.params) print("\n") print("the second linear regression line:") lin_model_2 = smf.ols('y ~ x', anascombe.groupby('dataset').get_group('II')).fit() print(lin_model_2.params) print("\n") print("the third linear regression line:") lin_model_3 = smf.ols('y ~ x', anascombe.groupby('dataset').get_group('III')).fit() print(lin_model_3.params) print("\n") print("the fourth linear regression line:") lin_model_4 = smf.ols('y ~ x', anascombe.groupby('dataset').get_group('IV')).fit() print(lin_model_4.params) #part2 #数据可视化 sns.set(style='whitegrid') g = sns.FacetGrid(anascombe, col="dataset") g.map(plt.scatter, "x","y") plt.show()
part 1运行结果:
part 2运行结果: