[mysql] 随机查询 效率比较

时间:2024-10-27 21:03:56
select
primary_count as primaryCount,
primary_score as primaryScore,
junior_count as juniorCount,
junior_score as juniorScore,
senior_count as seniorCount,
senoir_score as senoirScore,
total_score as totalScore,
pass_score as passScore
from pd_paper p
where p.is_valid = ''
order by RAND() limit 1 分析:
ORDER BY从句里面不能使用RAND()函数,因为这样会导致数据列被多次扫描。
测试发现这样效率非常低。一个15万余条的库,查询5条数据,要8秒以上。 You cannot use a column with RAND() values in an ORDER BY clause, because ORDER BY
would evaluate the column multiple times. 更高效的做法:查询max(id) * rand()来随机获取数据。 SELECT *
FROM `table` AS t1
JOIN (
SELECT ROUND(RAND()
* (SELECT MAX(id) FROM `table`)) AS id
) AS t2
WHERE t1.id >= t2.id
ORDER BY t1.id ASC LIMIT 5; 但是这样会产生连续的5条记录。解决办法只能是每次查询一条,查询5次。
即便如此也值得,因为15万条的表,查询只需要0.01秒不到。
采用join 语法 可以实现真正的随机。 SELECT *
FROM `table`
WHERE id >= (SELECT FLOOR( MAX(id) * RAND()) FROM `table` )
ORDER BY id LIMIT 1; 把语句完善一下,加上MIN(id)的判断。
如果没有加上MIN(id)的判断,结果有一半的时间总是查询到表中的前面几行。
完整的语句: ① where 子句 SELECT * FROM `table`
WHERE id >= (SELECT floor( RAND() * ((SELECT MAX(id) FROM `table`)-
(SELECT MIN(id) FROM `table`)) + (SELECT MIN(id) FROM `table`)))
ORDER BY id LIMIT 1; ② join SELECT *
FROM `table` AS t1
JOIN (
SELECT ROUND(RAND()
* ((SELECT MAX(id) FROM `table`)-(SELECT MIN(id) FROM `table`))
+(SELECT MIN(id) FROM `table`)) AS id) AS t2
WHERE t1.id >= t2.id
ORDER BY t1.id LIMIT 1; #随机查询一套考卷定义
SELECT
p.primary_count as primaryCount,
p.primary_score as primaryScore,
p.junior_count as juniorCount,
p.junior_score as juniorScore,
p.senior_count as seniorCount,
p.senoir_score as senoirScore,
p.total_score as totalScore,
p.pass_score as passScore FROM
pd_paper AS p
JOIN
(
SELECT ROUND(
RAND()
*((SELECT MAX(id) FROM pd_paper)-(SELECT MIN(id) FROM pd_paper))
+(SELECT MIN(id) FROM pd_paper)
) AS id
)
AS p2
WHERE p.id >= p2.id
ORDER BY p.id LIMIT 1; 最后在程序对这两个语句进行分别查询10次,
前者花费时间 0.147433 秒
后者花费时间 0.015130 秒
看来采用JOIN的语法比直接在WHERE中使用函数效率还要高很多。 附加一个复杂sql:按题目类型(三种)随机查询全部考题信息 SELECT * from (
select
p.id as id,
p.title as title,
p.question as question,
p.answer as answer,
p.crt_time as crtTime
from
pd_problem p
join
(SELECT ROUND(RAND()
* ((SELECT MAX(pp.id) FROM pd_problem pp)-(SELECT MIN(pp.id) FROM pd_problem pp))
+(SELECT MIN(pp.id) FROM pd_problem pp)) AS pid) AS p2
where p.id >= p2.pid
and p.is_valid = ''
and p.paper_type = ''
and p.paper_class = ''
order by p.id limit 5
) as t1
union all
SELECT * from (
select
p.id as id,
p.title as title,
p.question as question,
p.answer as answer,
p.crt_time as crtTime
from
pd_problem p
join
(SELECT ROUND(RAND()
* ((SELECT MAX(pp.id) FROM pd_problem pp)-(SELECT MIN(pp.id) FROM pd_problem pp))
+(SELECT MIN(pp.id) FROM pd_problem pp)) AS pid) AS p2
where p.id >= p2.pid
and p.is_valid = ''
and p.paper_type = ''
and p.paper_class = ''
order by p.id limit 5
) as t2
union all
SELECT * from (
select
p.id as id,
p.title as title,
p.question as question,
p.answer as answer,
p.crt_time as crtTime
from
pd_problem p
join
(SELECT ROUND(RAND()
* ((SELECT MAX(pp.id) FROM pd_problem pp)-(SELECT MIN(pp.id) FROM pd_problem pp))
+(SELECT MIN(pp.id) FROM pd_problem pp)) AS pid) AS p2
where p.id >= p2.pid
and p.is_valid = ''
and p.paper_type = ''
and p.paper_class = ''
order by p.id limit 5
) as t3