SQL基于前面的行填充空白数据

时间:2021-04-04 23:06:27

I have a SQL Table that is similar to the following:

我有一个类似于以下的SQL表:

ID, Date,   D1, D2, D3
1   1/1/13  0   X   A
2   1/2/13          
3   1/3/13  1
4   1/4/13          B
5   1/5/13  

that I need to update to the following:

我需要更新以下内容:

ID, Date,   D1, D2, D3
1   1/1/13  0   X   A
2   1/2/13  0   X   A       
3   1/3/13  1   X   A   
4   1/4/13  1   X   B
5   1/5/13  1   X   B

Basically filling in all the nulls with the value before it. This is only going to be performed once. Would a loop be the best option? Or is there something that would be more effective?

基本上就是用前面的值填充所有的空值。这只会执行一次。循环是最好的选择吗?还是有更有效的方法?

2 个解决方案

#1


5  

Presuming that you're using SQL-Server you can use this common-table-expression:

假设您正在使用SQL-Server,您可以使用这个通用表表达式:

WITH cte 
     AS (SELECT T1.id, 
                T1.date, 
                d1 = COALESCE(T1.d1, (SELECT TOP 1 d1 
                                      FROM   dbo.tablename T2 
                                      WHERE  T2.id < T1.id  
                                             AND d1 IS NOT NULL 
                                      ORDER  BY id DESC)), 
                d2 = COALESCE(T1.d2, (SELECT TOP 1 d2 
                                      FROM   dbo.tablename T2 
                                      WHERE  T2.id < T1.id 
                                             AND d2 IS NOT NULL 
                                      ORDER  BY id DESC)), 
                d3 = COALESCE(T1.d3, (SELECT TOP 1 d3 
                                      FROM   dbo.tablename T2 
                                      WHERE  T2.id < T1.id
                                             AND d3 IS NOT NULL 
                                      ORDER  BY id DESC)) 
         FROM   dbo.tablename T1) 
UPDATE T 
SET    T.d1 = C.d1, 
       T.d2 = C.d2, 
       T.d3 = C.d3 
FROM   dbo.tablename T 
       INNER JOIN cte C 
               ON T.id = C.id 
WHERE  T.d1 IS NULL 
        OR T.d2 IS NULL 
        OR T.d3 IS NULL 

DEMO

演示

Edit Since you have mentioned in the comments that you have '' instead of null, here is a modified version that supports both:

由于您在评论中提到“这里有一个修改后的版本,它同时支持以下两种功能:

WITH cte AS 
        (SELECT T1.id, 
                T1.date, 
                d1 = CASE WHEN COALESCE(T1.d1, '') <> '' THEN d1 
                       ELSE(SELECT TOP 1 d1 
                            FROM   dbo.tablename T2 
                            WHERE  T2.id < T1.id 
                                   AND COALESCE(T2.d1, '') <> '' 
                            ORDER  BY T2.id DESC) END, 
                d2 = CASE WHEN COALESCE(T1.d2, '') <> '' THEN d2 
                       ELSE(SELECT TOP 1 d2 
                            FROM   dbo.tablename T2 
                            WHERE  T2.id < T1.id 
                                   AND COALESCE(T2.d2, '') <> '' 
                            ORDER  BY T2.id DESC) END, 
                d3 = CASE WHEN COALESCE(T1.d3, '') <> '' THEN d3 
                       ELSE(SELECT TOP 1 d3 
                            FROM   dbo.tablename T2 
                            WHERE  T2.id < T1.id 
                                   AND COALESCE(T2.d3, '') <> '' 
                            ORDER  BY T2.id DESC) END 
         FROM   dbo.tablename T1) 
UPDATE T 
SET    T.d1 = C.d1, 
       T.d2 = C.d2, 
       T.d3 = C.d3 
FROM   dbo.tablename T 
       INNER JOIN cte C 
               ON T.id = C.id 
WHERE  COALESCE(T.d1, '') = '' 
        OR COALESCE(T.d2, '') = '' 
        OR COALESCE(T.d3, '') = '' 

DEMO

演示

#2


1  

Assuming we're using the ID column to determine order, you can do it without loops.

假设我们使用ID列来确定顺序,那么您可以不使用循环。

Whether you want to do this is another matter - it doesn't look pretty:

你是否想做这件事是另一回事——它看起来并不漂亮:

declare @t table (ID int, Date date,   D1 int, D2 char(1), D3 char(1))
insert into @t(ID, Date,   D1, D2, D3) values
(1,'20130101',0,'X','A'),
(2,'20130201',null,null,null),     
(3,'20130301',1,null,null),
(4,'20130401',null,null,'B'),
(5,'20130501',null,null,null)

update a
set
    a.D1 = COALESCE(a.D1,d1.D1),
    a.D2 = COALESCE(a.D2,d2.D2),
    a.D3 = COALESCE(a.D3,d3.D3)
from
    @t a
        left join
    @t D1
        on
            D1.ID < a.ID and
            D1.D1 IS NOT NULL
        left join
    @t D1_anti
        on
            D1_anti.ID < a.ID and
            D1_anti.D1 is not null and
            D1_anti.ID > D1.ID
        left join
    @t D2
        on
            D2.ID < a.ID and
            D2.D2 IS NOT NULL
        left join
    @t D2_anti
        on
            D2_anti.ID < a.ID and
            D2_anti.D2 is not null and
            D2_anti.ID > D2.ID
        left join
    @t D3
        on
            D3.ID < a.ID and
            D3.D3 IS NOT NULL
        left join
    @t D3_anti
        on
            D3_anti.ID < a.ID and
            D3_anti.D3 is not null and
            D3_anti.ID > D3.ID
where
    D1_anti.ID is null and
    D2_anti.ID is null and
    D3_anti.ID is null

select * from @t

Basically, we perform joins to try to find earlier rows which may be applicable, and then perform the _anti joins to make sure that each found earlier row is the latest such row that exists.

基本上,我们执行连接来尝试查找可能适用的早期行,然后执行_anti连接,以确保前面找到的每一行都是当前存在的最新的此类行。

#1


5  

Presuming that you're using SQL-Server you can use this common-table-expression:

假设您正在使用SQL-Server,您可以使用这个通用表表达式:

WITH cte 
     AS (SELECT T1.id, 
                T1.date, 
                d1 = COALESCE(T1.d1, (SELECT TOP 1 d1 
                                      FROM   dbo.tablename T2 
                                      WHERE  T2.id < T1.id  
                                             AND d1 IS NOT NULL 
                                      ORDER  BY id DESC)), 
                d2 = COALESCE(T1.d2, (SELECT TOP 1 d2 
                                      FROM   dbo.tablename T2 
                                      WHERE  T2.id < T1.id 
                                             AND d2 IS NOT NULL 
                                      ORDER  BY id DESC)), 
                d3 = COALESCE(T1.d3, (SELECT TOP 1 d3 
                                      FROM   dbo.tablename T2 
                                      WHERE  T2.id < T1.id
                                             AND d3 IS NOT NULL 
                                      ORDER  BY id DESC)) 
         FROM   dbo.tablename T1) 
UPDATE T 
SET    T.d1 = C.d1, 
       T.d2 = C.d2, 
       T.d3 = C.d3 
FROM   dbo.tablename T 
       INNER JOIN cte C 
               ON T.id = C.id 
WHERE  T.d1 IS NULL 
        OR T.d2 IS NULL 
        OR T.d3 IS NULL 

DEMO

演示

Edit Since you have mentioned in the comments that you have '' instead of null, here is a modified version that supports both:

由于您在评论中提到“这里有一个修改后的版本,它同时支持以下两种功能:

WITH cte AS 
        (SELECT T1.id, 
                T1.date, 
                d1 = CASE WHEN COALESCE(T1.d1, '') <> '' THEN d1 
                       ELSE(SELECT TOP 1 d1 
                            FROM   dbo.tablename T2 
                            WHERE  T2.id < T1.id 
                                   AND COALESCE(T2.d1, '') <> '' 
                            ORDER  BY T2.id DESC) END, 
                d2 = CASE WHEN COALESCE(T1.d2, '') <> '' THEN d2 
                       ELSE(SELECT TOP 1 d2 
                            FROM   dbo.tablename T2 
                            WHERE  T2.id < T1.id 
                                   AND COALESCE(T2.d2, '') <> '' 
                            ORDER  BY T2.id DESC) END, 
                d3 = CASE WHEN COALESCE(T1.d3, '') <> '' THEN d3 
                       ELSE(SELECT TOP 1 d3 
                            FROM   dbo.tablename T2 
                            WHERE  T2.id < T1.id 
                                   AND COALESCE(T2.d3, '') <> '' 
                            ORDER  BY T2.id DESC) END 
         FROM   dbo.tablename T1) 
UPDATE T 
SET    T.d1 = C.d1, 
       T.d2 = C.d2, 
       T.d3 = C.d3 
FROM   dbo.tablename T 
       INNER JOIN cte C 
               ON T.id = C.id 
WHERE  COALESCE(T.d1, '') = '' 
        OR COALESCE(T.d2, '') = '' 
        OR COALESCE(T.d3, '') = '' 

DEMO

演示

#2


1  

Assuming we're using the ID column to determine order, you can do it without loops.

假设我们使用ID列来确定顺序,那么您可以不使用循环。

Whether you want to do this is another matter - it doesn't look pretty:

你是否想做这件事是另一回事——它看起来并不漂亮:

declare @t table (ID int, Date date,   D1 int, D2 char(1), D3 char(1))
insert into @t(ID, Date,   D1, D2, D3) values
(1,'20130101',0,'X','A'),
(2,'20130201',null,null,null),     
(3,'20130301',1,null,null),
(4,'20130401',null,null,'B'),
(5,'20130501',null,null,null)

update a
set
    a.D1 = COALESCE(a.D1,d1.D1),
    a.D2 = COALESCE(a.D2,d2.D2),
    a.D3 = COALESCE(a.D3,d3.D3)
from
    @t a
        left join
    @t D1
        on
            D1.ID < a.ID and
            D1.D1 IS NOT NULL
        left join
    @t D1_anti
        on
            D1_anti.ID < a.ID and
            D1_anti.D1 is not null and
            D1_anti.ID > D1.ID
        left join
    @t D2
        on
            D2.ID < a.ID and
            D2.D2 IS NOT NULL
        left join
    @t D2_anti
        on
            D2_anti.ID < a.ID and
            D2_anti.D2 is not null and
            D2_anti.ID > D2.ID
        left join
    @t D3
        on
            D3.ID < a.ID and
            D3.D3 IS NOT NULL
        left join
    @t D3_anti
        on
            D3_anti.ID < a.ID and
            D3_anti.D3 is not null and
            D3_anti.ID > D3.ID
where
    D1_anti.ID is null and
    D2_anti.ID is null and
    D3_anti.ID is null

select * from @t

Basically, we perform joins to try to find earlier rows which may be applicable, and then perform the _anti joins to make sure that each found earlier row is the latest such row that exists.

基本上,我们执行连接来尝试查找可能适用的早期行,然后执行_anti连接,以确保前面找到的每一行都是当前存在的最新的此类行。