文章目录
- 前言
- 一、mysql基本语法回顾
- 1.建库、建表、修改结构
- on、group by、order by、聚合函数
- 二、mysql进阶
- 1.窗口函数over()用法
- 2.窗口函数over(partition by)用法
- 3.排序函数rank() dense_rank() row_number()
- 4.自定义窗口函数rows range
- 5.分析函数LEAD,LAG,FIRST_VALUE,LAST_VALUE,NTH_VALUE
- 总结
前言
mysql学习进阶
一、mysql基本语法回顾
1.建库、建表、修改结构
# 创建一个数据库:db_student,如果数据库已存在不能报错
create database if not exists db_student character set "UTF8";
# b) 根据图中两张表的定义,在db_student中创建两张表
use db_student;
create table if not exists Student
(
Id int(10) primary key auto_increment,
Name varchar(20) not null,
Sex varchar(4),
Birth YEAR,
Department varchar(20) not null,
Address varchar(50)
);
create table if not exists Score
(
Id int primary key auto_increment,
Stu_id int not null ,
C_name varchar(20),
Grade int
);
# c) 查询student表结构
desc Student;
# d) 修改student表中的字段名Sex为:gender
alter table Student change Sex gender varchar(4);
# e) 修改Score表,新增一个remark字段,类型为varchar(500)
alter table Score add remark varchar(500);
on、group by、order by、聚合函数
create database if not EXISTS db_student;
# 2、根据图中两张表的定义,在db_student中创建两张表
create table if not exists Student
(
Id int primary key auto_increment,
Name VARCHAR(20) not null,
Sex VARCHAR(4),
Birth YEAR,
Department VARCHAR(20) not null,
Address VARCHAR(50)
);
create table if not exists Score
(
Id int primary key auto_increment,
Stu_id int not NULL,
C_name VARCHAR(20),
Grade int
);
# 3、将以下信息插入student表
INSERT into Student
VALUES (901, '张老大', '男', '1985', '计算机系', '北京市海定区');
INSERT into Student
VALUES (902, '张老二', '男', '1986', '中文系', '北京市昌平区');
INSERT into Student
VALUES (903, '张三', '女', '1990', '中文系', '湖南省永州市');
INSERT into Student
VALUES (904, '李四', '男', '1990', '英语系', '辽宁省阜新市');
INSERT into Student
VALUES (905, '王五', '女', '1991', '英语系', '福建省厦门市');
INSERT into Student
VALUES (906, '王六', '男', '1988', '计算机系', '湖南省衡阳市');
# 4、将以下信息插入score表
INSERT INTO Score(Stu_id, C_name, Grade)
VALUES (901, '计算机', 98);
INSERT INTO Score(Stu_id, C_name, Grade)
VALUES (901, '英语', 80);
INSERT INTO Score(Stu_id, C_name, Grade)
VALUES (902, '计算机', 65);
INSERT INTO Score(Stu_id, C_name, Grade)
VALUES (902, '中文', 88);
INSERT INTO Score(Stu_id, C_name, Grade)
values (903, '中文', 95);
INSERT INTO Score(Stu_id, C_name, Grade)
values (904, '计算机', 70);
INSERT INTO Score(Stu_id, C_name, Grade)
values (904, '英语', 92);
INSERT INTO Score(Stu_id, C_name, Grade)
values (905, '英语', 94);
INSERT INTO Score(Stu_id, C_name, Grade)
values (906, '计算机', 90);
INSERT INTO Score(Stu_id, C_name, Grade)
values (906, '英语', 85);
# 5、从student表中查询每个院系有多少人
SELECT Department, count(*) as '人数'
from Student
GROUP BY Department;
# 6、从score表中查询每个科目的最高分
SELECT DISTINCT C_name, MAX(Grade) as '最高分'
from Score
GROUP BY C_name;
# 7、从score表中计算每个考试科目的平均分
SELECT C_name, ROUND(avg(grade), 2) as '最高分'
from Score
GROUP BY C_name;
# 8、从score表中将计算机考试成绩按从高到低进行排序
SELECT Grade, C_name, Stu_id
from Score
where C_name = '计算机'
ORDER BY Grade DESC;
# 2、请根据以上表的定义,在db_emp中创建emp和dept表,salgrade表
create database if not exists db_emp;
use db_emp;
create table if not exists emp
(
empno int PRIMARY key,
empname VARCHAR(10) not null,
job VARCHAR(10) not null,
manager int,
hiredate date,
salary double,
comm double,
deptno int
);
create table dept
(
deptno int primary key,
dname VARCHAR(20) not null,
loc VARCHAR(20)
);
create table salgrade
(
grade int PRIMARY key,
losal DOUBLE,
hisal DOUBLE
);
# 3、请将以下数据插入emp表
insert into emp VALUES(369,'SMITH','CLERK',7902,'1980-12-17',800,null,20);
insert into emp VALUES(7499,'AKKEN','SALESMAN',7698,'1981-02-20',1600,300,20);
insert into emp VALUES(7521,'WARD','SALESMAN',7698,'1981-02-22',1250,500,30);
insert into emp VALUES(7566,'JONES','MANAGER',7839,'1981-04-02',2975,null,20);
insert into emp VALUES(7654,'MARTIN','SALESMAN',7698,'1981-09-28',1250,1400,30);
insert into emp VALUES(7698,'BLAKE','MANAGER',7839,'1981-05-01',2850,null,30);
insert into emp VALUES(7782,'CLARK','MANAGER',7839,'1981-06-09',2450,null,10);
insert into emp VALUES(7788,'SCOTT','ANALYST',7566,'1987-07-03',3000,null,20);
insert into emp VALUES(7839,'KING','PRESIDENT',null,'1981-11-17',5000,null,10);
insert into emp VALUES(7844,'TURNER','SALESMAN',7698,'1981-09-08',1500,null,30);
insert into emp VALUES(7876,'ADAMS','CLERK',7788,'1987-07-13',1100,null,20);
insert into emp VALUES(7900,'JAMES','CLERK',7698,'1981-12-03',950,null,30);
insert into emp VALUES(7902,'FORD','ANALYST',7566,'1981-12-03',3000,null,20);
insert into emp VALUES(7934,'MILLER','CLERK',7782,'1981-01-23',1300,null,10);
# 4、请将以下数据插入dept表
insert into dept VALUES(10,'ACCOUNTING','NEW YORK');
insert into dept VALUES(20,'RESEARCH','DALLAS');
insert into dept VALUES(30,'SALES','CHICAGO');
insert into dept VALUES(40,'OPERATIONS','BOSTON');
# 5、请将以下数据插入salgrade表
INSERT into salgrade VALUES(1,700,1200);
INSERT into salgrade VALUES(2,1201,1400);
INSERT into salgrade VALUES(3,1401,2000);
INSERT into salgrade VALUES(4,2001,3000);
INSERT into salgrade VALUES(5,3001,9999);
# 5、查询名字长度为6个字符的员工信息(查阅资料)
select * from emp where LENGTH(empname) =6;
# 6、查询员工的信息,并按工作降序排序,如果工作相同按照工资升序排序
SELECT * from emp ORDER BY job desc,salary ASC;
# 7、查询每个工作岗位的员工人数并按照人数降序排序
SELECT job,count(*) as 'numb' from emp GROUP BY job;
# 8、查询每个工作岗位的最高薪资
SELECT job,max(salary) as maxsal from emp GROUP BY job;
# 9、查询所有员工的信息及所在部门名称
select empno,empname,job,manager,hiredate,salary,comm,a.deptno,dname from emp a,dept b where a.deptno=b.deptno;
# 10、查询销售部(SALES)所有员工的姓名
SELECT empname,dname from emp a,dept b where b.dname='SALES' and a.deptno = b.deptno;
# 11、查询工资高于平均工资的员工信息(查阅资料,学习子查询)
SELECT * from emp where salary>(SELECT AVG(salary) from emp);
# 12、查询每个部门的最高薪资、最低薪资、平均薪资并按照最高薪资降序排序
select deptno,max(salary),min(salary),avg(salary) from emp GROUP BY deptno ORDER BY max(salary) desc;
# 13、列出所有员工的姓名及其直接上级的姓名(自关联 将一张表当做多张表使用)
SELECT a.empname,b.empname as '上级' from emp a left JOIN emp b on a.manager = b.empno;
二、mysql进阶
1.窗口函数over()用法
OVER (…) 的最基本用法: OVER() 意思是所有的数据都在窗口中
SELECT
first_name,
last_name,
salary,
AVG(salary) OVER()
FROM employee;
AVG(salary) 意思是要计算平均工资,加上 OVER() 意味着对全部数据进行计算,所以就是在计算所有人的平均工资。
需要注意的是,我们没有使用 GROUP BY 进行分组,这样在查询结果中除了聚合函数的结果之外,我们还可以显示其他数据如果使用 GROUP BY 想将聚合结果与原始数据放到一个结果中,需要使用子查询,效率相对低
# 需求:创建报表,除了查询每个人的工资之外,还要统计出公司每月的工资支出
select
first_name,
last_name,
salary,
sum(salary) over()
from employee;
# 需求:统计采购表中的平均采购价格,并与明细一起显示(每件物品名称,价格)
select
item,
price,
avg(price) over()
from employee;
# OVER() 用于将当前行与一个聚合值进行比较,例如,我们可以计算出员工的薪水和平均薪水之间的差
# 窗口函数的典型应用场景:将当前行与一组数据的聚合值进行比较
select
first_name,
last_name,
salary,
avg(salary) over(),
salary - avg(salary) over() as difference
from employee;
# 需求:创建报表统计每个员工的工龄和平均工龄之间的差值报表中包含如下字段:
# 员工的名字,员工的姓氏,员工的工龄,所有员工的平均工龄,员工工龄和平均工龄之间的差值
select
first_name,
last_name,
years_worked,
avg(years_worked) over(),
years_worked - avg(years_worked) over() as years_difference
from employee;
# over()和count()一起使用
SELECT
id,
name,
COUNT(id) OVER()
FROM department
ORDER BY name ASC;
# 需求:查询月薪超过4000的员工,并统计所有月薪超过4000的员工数量
# first_name , last_name , salary 和 超过4000的员工数量
select
first_name,
last_name,
salary,
count(id) over()
from
employee
where
salary >4000;
# 需求:创建报表,在purchase表基础上,添加平均价格和采购总金额两列
select
*,
avg(price) over(),
sum(price) over()
from
purchase;
# 需求:查询部门id为1,2,3三个部门员工的姓名,薪水,和这三个部门员工的平均薪资
select
first_name,
last_name,
salary,
avg(salary) over()
from
employee
where
department_id in(1,2,3);
#需求:统计每一次采购的基本信息包括
# id ,name采购部门名称 item采购的物品明细 price采购价格 minimum price最低采购价格
# 当前价格 price 与最低采购价格 minimum price 的差价
select
d.id,
d.name,
p.item,
p.price,
min(p.price) over () as min_price,
p.price - min(p.price) over () as diff
from department d
join purchase p on d.id = p.department_id;
2.窗口函数over(partition by)用法
PARTITION BY 的作用与 GROUP BY 类似:将数据按照传入的列进行分组,与 GROUP BY 的区别是, PARTITION BY 不会改变结果的行数 partition by 与 group by 的区别: ① partition by 是分析函数,group by是分组函数 ② 在执行顺序上:而partition by应用在以上关键字之后,可以简单理解为就是在执行完select之后,在所得结果集之上进行partition by分组 ③ partition by相比较于group by,能够在保留全部数据的基础上,只对其 中某些字段做分组排序(类似excel中的操作),而group by则只保留参与分组的字段和聚合函数的结果(类似excel中的pivot透视表)# 需求:查询每种类型火车的ID,型号,一等座数量,同型号一等座数量总量
select id,
model,
first_class_places,
sum(first_class_places) over (partition by model)
from train;
# 用group by 实现
select id,
train.model,
first_class_places,
c.sum
from train
join
(select model, sum(first_class_places) as sum
from train
group by model) as c
on train.model = c.model;
# 需求:查询每天的车次数量
# 查询结果包括:时刻表车次ID,日期,每天的车次数量
select
id,
date,
count(id) over(partition by date) as num
from journey;
# 需求:按车型分组,每组中满足一等座>30,二等座>180的有几条记录
# 查询结果包括如下字段: id , model , first_class_places ,
# second_class_places , count 满足条件的记录数量
select
id,
model,
first_class_places,
second_class_places,
count(id) over(partition by model)
from train
where first_class_places>30 and second_class_places>180;
# 需求:查询每天,每条线路速的最快车速
# 查询结果包括如下字段:线路ID,日期,车型,每天相同线路的最快车速
select
a.route_id,
a.date,
b.model,
b.max_speed,
max(max_speed) over(partition by route_id,date)
from JOURNEY a
join
train b
on a.train_id = b.id;
# 需求:查询时刻表中的车次ID,运营车辆的生产日期
# ( production_year ),同一种车型的车次数量,同一线路的车次数量
select
j.id,
b.production_year,
count(j.id) over(partition by train_id) as tran,
count(j.id) over(partition by route_id)as rout
from journey j
join
train b
on train_id = b.id;
# 需求:查询票价表,返回 id , price , date ,并统计每天的在售车票
# 的平均价格,每天的在售车票种类。(不统计运营车辆id为5的数据)
select
a.id,
a.price,
b.date,
avg(a.price) over(partition by date)as avg_,
count(a.id) over(partition by date)as count_
from
ticket a
join
journey b
on a.journey_id = b.id
where b.id !=5;
# 需求:统计每一个员工的姓名,所在部门,薪水,该部门的最低薪水,该部门的最高薪水
select
a.first_name,
a.last_name,
b.name,
a.salary,
min(salary) over(partition by b.name)as min_p,
max(salary) over(partition by b.name)as max_p
from
employee a
join
department b
on a.department_id = b.id;
# 需求:统计每名员工在所在部门的工资占比
# 返回如下字段姓名,所在部门名称,薪水,薪水占该部门比例
select
a.first_name,
a.last_name,
b.name,
a.salary,
round(salary / sum(salary) over(partition by b.name) * 100,2) as precent
from
employee a
join
department b
on a.department_id = b.id;
3.排序函数rank() dense_rank() row_number()
rank() over()返回的排序1,1,3,3,3,6 dense_rank()返回的排序1,1,2,2,2,3 row_number()返回的排序1,2,3,4,5,6# 上面的SQL对所有数据按编辑评分排序,RANK()函数会返回排序后的排名序号
select
name,
platform,
editor_rating,
RANK() over (Order By editor_rating) as rank_
from game;
# dense_rank()函数
select
name,
genre,
updated,
dense_rank() over (order by updated) as dense_rank
from game;
# 想获取排序之后的序号,也可以通过ROW_NUMBER() 来实现,从名字上就能知道,意思是返回行号
select
name,
platform,
editor_rating,
row_number() over (order by editor_rating) 'row_number'
from game;
# 需求:在列表中查找比较新,且安装包体积较小的游戏( released ,size )
select
name,
plarform,
genre,
size,
released,
rank() over (order by released desc ,size) as rank_num
from game;
# 需求:查询游戏名称,类别,安装包大小的排名序号,结果按发行日期降序排列
select
name,
genre,
rank() over (order by size) as size_num
from game
order by released desc;
# 在游戏销售表中添加日期排序列(按日期从近到远排序),最终结果按打分( editor_rating )排序
select
a.id,
a.game_id,
a.price,
a.date,
b.editor_rating,
row_number() over (order by date desc) 'row_num'
from game_purchase a,game b
where a.game_id = b.id
order by b.editor_rating desc;
# NTILE(x)函数将数据分成x组,并给每组分配一个数字(1,2,3...),例如:
SELECT
name,
genre,
editor_rating,
NTILE(3) OVER (ORDER BY editor_rating DESC) as grou
FROM game;
# 将所有的游戏按照升级日期降序排列分成4组,返回游戏名字,类别,更新日期,和分组序号
select
name,
genre,
updated,
NTILE(4) over (order by updated)
from game;
# 需求:查找打分排名第二的游戏
with ranking as (
select
name,
rank() over (order by editor_rating DESC) as rank_
from game
)
select
name
from ranking
where rank_=2;
# 需求:查询安装包大小最小的游戏,返回游戏名称,类别,安装包大小
with ranking as (
select
name,
genre,
size,
rank() over (order by size) as rank_
from
game
)
select
name,
genre,
size
from
ranking
where rank_ =1;
4.自定义窗口函数rows range
ROWS BETWEEN lower_bound AND upper_bound
UNBOUNDED PRECEDING – 对上限无限制
PRECEDING – 当前行之前的第 n 行 ( n ,填入具体数字如:5
PRECEDING )
CURRENT ROW – 仅当前行
FOLLOWING –当前行之后的第 n 行 ( n ,填入具体数字如:5
FOLLOWING )
UNBOUNDED FOLLOWING – 对下限无限
# 统计到当前行为止的累计下单金额(running_total),以及前后3天下单金额总和(sum_3_before_after)。
select
id,
total_price,
sum(total_price) over(
order by placed
rows unbounded preceding
) as running_total,
sum(total_price) over (
order by placed
rows between 3 preceding and 3 following
) as sum_3_before_after
from single_order;
# 需求:仓库发货时需要手工拣货。 对于order_id = 5的订单,计算未分拣的商品数量总和。 对于该订单中的每种商品,按升序查询起出货明细中
# 的ID,产品ID,产品数量和剩余未拣货商品的数量(包括当前行)
select
id,
product_id,
quantity,
sum(quantity) over (
order by id
rows between current row and unbounded following
) as sum_
from
order_position
where order_id = 5;
# 需求:统计每件商品的上架日期,以及截至值该日期,上架商品种类数量
select
id,
name,
introduced,
count(id) over (
order by introduced
rows between unbounded preceding and current row
) as count_
from product;
# 需求:针对每一笔订单,统计下单日期,订单总价,每5笔订单计算一次平均价格(当前行,前后各两行,按下单日期排序),
# 并计算当前订单价格和每5笔订单平均价格的比率
select
id,
placed,
total_price,
avg(total_price) over(
order by placed
rows between 2 preceding and 2 following
) as avg_,
total_price/avg(total_price) over(
order by placed
rows between 2 preceding and 2 following
)
from single_order;
rows用于按行累加,range用于全范围rows和range的区别:
ROWS 和 RANGE 的区别,与 ROW_NUMBER 和 RANK() 的区别类似
和使用 ROWS 一样,使用 RANGE 一样可以通过 BETWEEN …AND… 来自定义窗口
在使用 RANGE 时,我们一般用
RANGE UNBOUNDED PRECEDING
RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
RANGE CURRENT ROW
但是在使用 RANGE 确定窗口大小是,一般不与 n PRECEDING 或 n FOLLOWING 一起使用
使用ROWS,通过当前行计算前n行/后n行,很容易确定窗口大小,使用RANGE,是通过行值来进行判断,如果使用3 PRECEDING 或 3
FOLLOWING 需要对当前行的值进行-3 或者+3操作,具体能选中几行很难确定,通过WINDOW FRAMES 我们希望定义的窗口大小是固定
的、可预期的,但当RANGE 和n PRECEDING 或 n FOLLOWING 具体会选中几行数据,跟随每行取值不同而发生变化,窗口大小很可能不固定
# 需求:统计每个产品的库存变化情况,返回如下字段
# id , product_id , quantity , changed (库存变化发生日期),每件产品变化的总量 sum
select
id,
product_id,
quantity,
changed,
sum(quantity) over(
order by product_id
range current row
)as sum_
from stock_change;
# 需求:统计库存变化情况,返回如下字段
# id , changed (库存变化发生日期),库存变化到当前日期为止的累计次数 count
select
id,
changed,
count(id) over(
order by changed
range unbounded preceding
)as count
from stock_change;
默认的window frames
如果在OVER(…)中没有ORDER BY子句,则所有行视为一个window frames
如果在OVER(…)中指定了ORDER BY子句,则会默认添加 RANGE UNBOUNDED PRECEDING 作为window frame
# 需求:统计id为1的部门,每年的移动平均营收,返回如下字段
# department_id , year , amount移动平均营收 (按年排序,统计当前年份之前的所有年份的收入平均值)
select
department_id,
year,
amount,
avg(amount) over(
order by year
rows unbounded preceding
)as avg
from revenue;
# 需求:统计每年所有部门的平均收入和每个部门当年收入和所有部门平
# 均收入之差,按年排序department_id , year , amount avg 所有部门年平均收入, diff 部门收入与平均收入之差
select
department_id,
year,
amount,
avg(amount) over(
order by department_id
range current row
) as avg,
amount - avg(amount) over(
order by department_id
range current row
) as diff
from revenue;
5.分析函数LEAD,LAG,FIRST_VALUE,LAST_VALUE,NTH_VALUE
lead() 返回上一行列值,计算增量
lag()返回下一行列值,计算变化
first_value()返回第一行值,计算最大、最小
last_value()返回最后一行值,计算最大、最小
nth_value()返回第x行值
# 需求: 统计id 为1的网站,每天访问的人数以及下一天访问的人数
# 返回字段: day 日期, users 访问人数, lead 下一天访问人数
select
day,
users,
lead(users) over (
order by day
) as "下一天人数"
from statistics
where website_id=1;
# lead函数在计算增量的时候非常有用,比如我们想比较同一列两个值的差
select
day,
users,
lead(users) over (
order by day
) as "下一天人数",
users - lead(users) over (
order by day
) as "与下一天之差"
from statistics
where website_id=1;
# 最后一列显示了当日与次日之间的点击次数差异,从业务角度来看,这可以很容易地告诉我们有关该网站的很多信息
# 如果大多数增量是正的,且增量在逐渐变大,那么该网站业务可能处于上升期
# 如果大多数是负的,那么需要找到收入下滑的原因
# 需求:统计id为1的网站,每日收入,后一天收入,以及每日收入的环比
select
day,
revenue,
lead(revenue) over (
order by day
) as lead_,
lead(revenue) over (order by day) - revenue as diff
from statistics
where website_id=1;
# LEAD函数还可以传入两个参数:
# 参数1 跟传入一个参数时的情况一样:一列的列名
# 参数2 代表了偏移量,如果传入2 就说明要以当前行为基准,向前移动两列作为返回值
SELECT
name,
opened,
LEAD(opened,2) OVER(ORDER BY opened)
FROM website;
# lead函数也可以接收三个参数,第三个参数用来传入默认值,应用场景是当使用lead函数返回null的时候,
# 可以用第三个参数传入的默认值进行填充
select
day,
users,
lead(users,7,-1) over (order by day) as '7天后'
from statistics
where
website_id=2 and day between '2016-05-01' and '2016-05-14';
lag(x)函数和LAG(x)函数与LEAD(x)用法类似,区别是,LEAD返回当前行后面的值,LAG返回当前行之前的值
注意: LEAD(…) 和 LAG(…) ,之间可以互相替换,可以在ORDER BY的时候通过 DESC 来改变排序方式,使 LEAD(…) 和 LAG(…)返回相同结果,
LEAD (…) OVER(ORDER BY …)与下面的写法相似
LAG (…) OVER (ORDER BY … DESC)
LEAD (…) OVER(ORDER BY … DESC)与下面的写法相似
LAG (…) OVER (ORDER BY …)
# 需求:统计id为3的网站每天的点击数量,前一天的点击数量
select
day,
clicks,
lead(clicks) over (order by day desc )
from statistics
where website_id=3;
# 结果相同排序不同
select
day,
clicks,
lag(clicks) over (order by day)
from statistics
where website_id=3;
lag()与lead()用法相同
# 每千次展示收入RPM (revenue per thousand impressions) 定义:
# RPM = (revenue / impressions) * 1000
# 收入( revenue ) 除以展示次数( impressions )乘 1000.
# 需求:统计id为2的网站,每天的RPM以及7日后的RPM
select
day,
revenue / impressions * 1000 as RPM,
lead(revenue,7) over (order by day) / lead(impressions,7) over (order by day) * 1000 as RPM_7
from statistics
where website_id=2;
# #转化率定义:转化率= 点击次数 clicks /展示次数 impressions *100
# 需求:统计id = 1的网站,5月15日至5月31日,每天点击次数 clicks ,
# 展示次数 impressions ,转化率( conversion )和前一天的转化率
# ( previous_conversion )
select
day,
clicks,
impressions,
clicks / impressions * 100 as conversion,
lag(clicks) over (order by day) / lag(impressions) over (order by day) * 100 as previous_conversion
from statistics
where website_id=1
and day between '2016-05-15' and '2016-05-31';
# 需求:统计id为2的网站每天用户访问情况,以及最少用户访问人数
select
day,
users,
first_value(users) over (order by day)
from statistics
where website_id=2;
# FIRST_VALUE(x)返回第一个值,LAST_VALUE(x)返回最后一个值
SELECT
name,
opened,
LAST_VALUE(opened) OVER(ORDER BY opened)
FROM website;
# 当 OVER 子句中包含 ORDER BY 时,如果我们不显式定义window frame,SQL会自动带上默认的window frame语句:
# RANGE UNBOUNDED PRECEDING , 意味着我们的查询范围被限定在第一行到当前行( current row )
# 如果想通过LAST_VALUE 与ORDER BY配合得到所有数据排序后的最后一个值,需要吧window frame语句写成
# RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING 或者
# ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
SELECT
name,
opened,
LAST_VALUE(opened) OVER(
ORDER BY opened
range between current row and unbounded following
)
FROM website;
# NTH_VALUE(x,n) 函数返回 x列,按指定顺序的第n个值
SELECT
name,
opened,
NTH_VALUE(opened, 2) OVER(
ORDER BY opened
ROWS BETWEEN UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING)
FROM website;
# 需求:统计id为2的网站的收入情况,在5月15和5月31日之间,每天的收入,以及这半个月内的第三高的日收入金额
select
revenue,
day,
NTH_VALUE(revenue,3) over (
order by revenue desc
rows between unbounded preceding and unbounded following
)
from statistics
where website_id=2
and day between '2016-05-15' and '2016-05-31';
# 需求:统计id为3的网站每天点击的情况,返回如下字段
# 日期 day ,点击量 clicks ,最高点击量和当天点击量的比例ratio (用整数表示)
select
day,
clicks,
round(clicks/first_value(clicks) over (order by clicks desc
rows between unbounded preceding and unbounded following
)*100) as ratio
from statistics
where website_id=3;
总结
sql的学习还是要多写,只有写出来才能知道结果,理论只是指引写的。
光阴似水,人生逆旅矣。