pandas与sqlalchemy交互实现科学计算

时间:2023-03-09 15:46:10
pandas与sqlalchemy交互实现科学计算
 
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
#建立数据库引擎
engine = create_engine('mysql+pymysql://root:mysql@127.0.0.1:3306/mymac')
sql = 'select * from student'
#建立dataframe对象
df = pd.read_sql_query(sql,engine)
print(df)
#按照年龄正序
# select * from student order by age asc |desc
df = df.sort_values(['age','height'],ascending=False)
print(df)
#取年龄最小的学生
# select * from student order by age limit 1
dd = df.sort_values(['age']).head(1)
print(dd)
#求整个班的平均年龄 使用loc方法指定字段
#select * from student where gender = 0 and age < (select avg(age) from student gender=0)
avg_age = df.loc[df['gender'] == 0].age.mean()
print(df.loc[(df['age']< avg_age) & (df['gender'] == 0)].head(1))
#全班的平均年龄
print(df.age.mean())
#全班的年龄求和
print(df.age.sum())
#取男生的年龄求和
print(df.loc[df['gender'] == 1].age.sum())
#获取指定字段
print(df.loc[ df['gender'] == 1,['id','name']])
#取全部年龄最小 min 最大max
print(df.age.min())
#或者 |
print(df.loc[(df['gender']==0) | (df['gender'] == 1)])
#全班多少人
print(df.id.count())
#排除法
print(df.loc[(df['gender'] != 0) & (df['gender'] != 1)])
#修改操作
df.loc[1,'gender'] = 1
df.loc[4,'age'] = 19
print(df)
#取单值
print(df.loc[2,'name']) # 将datafram写入数据表 表名,数据引擎,数据是否入库 (不建议使用,因为键的表字段类型约束等需调整)
# 需安装openpyxl库
# df.to_sql('student_copy',engine,index=False)
#到出excel文件
# df.to_excel('student.xlsx',index=False)