I need to take the first N rows for each group, ordered by custom column.
我需要为每组取前N行,按自定义列排序。
Given the following table:
得到下表:
db=# SELECT * FROM xxx;
id | section_id | name
----+------------+------
1 | 1 | A
2 | 1 | B
3 | 1 | C
4 | 1 | D
5 | 2 | E
6 | 2 | F
7 | 3 | G
8 | 2 | H
(8 rows)
I need the first 2 rows (ordered by name) for each section_id, i.e. a result similar to:
我需要每个section_id的前两行(按名称排序),即类似于:
id | section_id | name
----+------------+------
1 | 1 | A
2 | 1 | B
5 | 2 | E
6 | 2 | F
7 | 3 | G
(5 rows)
I am using PostgreSQL 8.3.5.
我使用的是PostgreSQL 8.3.5。
5 个解决方案
#1
190
New solution (PostgreSQL 8.4)
新的解决方案(PostgreSQL 8.4)
SELECT
*
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY section_id ORDER BY name) AS r,
t.*
FROM
xxx t) x
WHERE
x.r <= 2;
#2
11
Here's another solution (PostgreSQL <= 8.3).
这里是另一个解决方案(PostgreSQL <= 8.3)。
SELECT
*
FROM
xxx a
WHERE (
SELECT
COUNT(*)
FROM
xxx
WHERE
section_id = a.section_id
AND
name <= a.name
) <= 2
#3
11
Since v9.3 you can do a lateral join
因为v9.3你可以做横向连接
select distinct t_outer.section_id, t_top.id, t_top.name from t t_outer
join lateral (
select * from t t_inner
where t_inner.section_id = t_outer.section_id
order by t_inner.name
limit 2
) t_top on true
order by t_outer.section_id;
It might be faster but, of course, you should test performance specifically on your data and use case.
它可能更快,但是,当然,您应该在数据和用例上测试性能。
#4
2
SELECT x.*
FROM (
SELECT section_id,
COALESCE
(
(
SELECT xi
FROM xxx xi
WHERE xi.section_id = xo.section_id
ORDER BY
name, id
OFFSET 1 LIMIT 1
),
(
SELECT xi
FROM xxx xi
WHERE xi.section_id = xo.section_id
ORDER BY
name DESC, id DESC
LIMIT 1
)
) AS mlast
FROM (
SELECT DISTINCT section_id
FROM xxx
) xo
) xoo
JOIN xxx x
ON x.section_id = xoo.section_id
AND (x.name, x.id) <= ((mlast).name, (mlast).id)
#5
2
-- ranking without WINDOW functions
-- EXPLAIN ANALYZE
WITH rnk AS (
SELECT x1.id
, COUNT(x2.id) AS rnk
FROM xxx x1
LEFT JOIN xxx x2 ON x1.section_id = x2.section_id AND x2.name <= x1.name
GROUP BY x1.id
)
SELECT this.*
FROM xxx this
JOIN rnk ON rnk.id = this.id
WHERE rnk.rnk <=2
ORDER BY this.section_id, rnk.rnk
;
-- The same without using a CTE
-- EXPLAIN ANALYZE
SELECT this.*
FROM xxx this
JOIN ( SELECT x1.id
, COUNT(x2.id) AS rnk
FROM xxx x1
LEFT JOIN xxx x2 ON x1.section_id = x2.section_id AND x2.name <= x1.name
GROUP BY x1.id
) rnk
ON rnk.id = this.id
WHERE rnk.rnk <=2
ORDER BY this.section_id, rnk.rnk
;
#1
190
New solution (PostgreSQL 8.4)
新的解决方案(PostgreSQL 8.4)
SELECT
*
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY section_id ORDER BY name) AS r,
t.*
FROM
xxx t) x
WHERE
x.r <= 2;
#2
11
Here's another solution (PostgreSQL <= 8.3).
这里是另一个解决方案(PostgreSQL <= 8.3)。
SELECT
*
FROM
xxx a
WHERE (
SELECT
COUNT(*)
FROM
xxx
WHERE
section_id = a.section_id
AND
name <= a.name
) <= 2
#3
11
Since v9.3 you can do a lateral join
因为v9.3你可以做横向连接
select distinct t_outer.section_id, t_top.id, t_top.name from t t_outer
join lateral (
select * from t t_inner
where t_inner.section_id = t_outer.section_id
order by t_inner.name
limit 2
) t_top on true
order by t_outer.section_id;
It might be faster but, of course, you should test performance specifically on your data and use case.
它可能更快,但是,当然,您应该在数据和用例上测试性能。
#4
2
SELECT x.*
FROM (
SELECT section_id,
COALESCE
(
(
SELECT xi
FROM xxx xi
WHERE xi.section_id = xo.section_id
ORDER BY
name, id
OFFSET 1 LIMIT 1
),
(
SELECT xi
FROM xxx xi
WHERE xi.section_id = xo.section_id
ORDER BY
name DESC, id DESC
LIMIT 1
)
) AS mlast
FROM (
SELECT DISTINCT section_id
FROM xxx
) xo
) xoo
JOIN xxx x
ON x.section_id = xoo.section_id
AND (x.name, x.id) <= ((mlast).name, (mlast).id)
#5
2
-- ranking without WINDOW functions
-- EXPLAIN ANALYZE
WITH rnk AS (
SELECT x1.id
, COUNT(x2.id) AS rnk
FROM xxx x1
LEFT JOIN xxx x2 ON x1.section_id = x2.section_id AND x2.name <= x1.name
GROUP BY x1.id
)
SELECT this.*
FROM xxx this
JOIN rnk ON rnk.id = this.id
WHERE rnk.rnk <=2
ORDER BY this.section_id, rnk.rnk
;
-- The same without using a CTE
-- EXPLAIN ANALYZE
SELECT this.*
FROM xxx this
JOIN ( SELECT x1.id
, COUNT(x2.id) AS rnk
FROM xxx x1
LEFT JOIN xxx x2 ON x1.section_id = x2.section_id AND x2.name <= x1.name
GROUP BY x1.id
) rnk
ON rnk.id = this.id
WHERE rnk.rnk <=2
ORDER BY this.section_id, rnk.rnk
;