import pandas as pd
import as plt
import seaborn as sns
#加载数据
data_path=r'D:\机器学习\数据集:国家划分的生活成本\Cost_of_Living_Index_by_Country_2024.csv'
df=pd.read_csv(data_path)
#显示数据前几行
print(())
#检查是否有缺失值
print(().sum())
#基本统计信息
print(())
#可视化
#单变量分析
(bins=20,figsize=(12,10),color='blue')
plt.tight_layout() #调整子图参数,使之填充整个图像区域
()
#多变量分析
numeric_df = df.select_dtypes(include=['float64', 'int64'])
corr_matrix=numeric_df.corr()
(corr_matrix,annot=True,cmap='coolwarm')
('Correlation Matrix')
()
# #保存图片
# ('8.11Cost_of_Living_Index_by_Country_2024.png')
#柱状图-排名前十的国家的生活成本指数
top_10_countries=(10)
(figsize=(12,6))
(x='Country',y='Cost of Living Index',data=top_10_countries)
(rotation=90)#旋转x轴标签
('Top 10 Countries by Cost of Living Index')
()
#散点图-生活成本指数与租金指数关系
(figsize=(10,6))
(x='Cost of Living Index',y='Rent Index',data=df)#添加标题和标签
('Cost of Living Index vs Rent Index')
()
#热力图-各个指标之间的相关性
(figsize=(10,6))
numeric1_df = df.select_dtypes(include=['float64', 'int64'])
(numeric1_df.corr(),annot=True,cmap='coolwarm')
('Correlation Heatmap')
()