I have 2 tables: Users and Results. The usertable contains duplicate data which is reflected in the results table. The user below is created 3 times. I need to update the results table where UserId 2 and 3 to 1 so that all the results can be viewed on this user only.
This is easy if I have only have a few users and a few results for them, but in my case I have 500 duplicated users and 30000 results.
我有两个表:用户和结果。usertable包含反映在结果表中的重复数据。下面的用户被创建了3次。我需要更新UserId 2和3到1的结果表,以便所有结果只能在这个用户上查看。如果我只有几个用户和一些结果,这很容易,但是在我的例子中,我有500个重复用户和30000个结果。
I am using SQL Server Express 2014
我正在使用SQL Server Express 2014。
I will really appreciate any help with this!
非常感谢您的帮助!
Edit: misstyped column names in resultTable. Im sorry if you got confused by it.
编辑:在resultTable中输入了错误的列名。如果你感到困惑,我很抱歉。
UserTable
UserId---Fname---LName
1-----Georg-----Smith
2-----Georg-----Smith
3-----Georg-----Smith
ResultsTable
ResultId---UserRefId
1-----1
2-----2
3-----3
4-----1
I have manage to select duplicates from usertable, but i don't know how to proceed further.
我已经设法从usertable中选择了副本,但是我不知道如何继续下去。
;WITH T AS
(
SELECT *, COUNT(*) OVER (PARTITION BY Fname + Lname) as Cnt
FROM TestDatabase.Users
)
SELECT Id, Fname, Lname
FROM T
WHERE Cnt > 2
3 个解决方案
#1
0
You are on the right track with the cte. The ROW_NUMBER()
function can be used to flag duplicate UserIds, then you can join the cte into the from clause of your update statement to find the UserIds you want to replace, and join again to find the UserIds you want to replace them with.
你和cte的关系很好。ROW_NUMBER()函数可以用来标记重复的UserIds,然后您可以将cte加入到update语句的from子句中,以找到您想要替换的UserIds,并再次连接以找到您想要替换的UserIds。
;WITH cteDedup AS(
SELECT
UserId
,FName
,LName
,ROW_NUMBER() OVER(PARTITION BY FName, LName ORDER BY UserID ASC) AS row_num
FROM UserTable
)
UPDATE rt
SET UserId = original.UserId
FROM ResultsTable rt
JOIN cteDedup dupe
ON rt.UserId = dupe.UserId
JOIN cteDedup original
ON dupe.FName = original.FName
AND dupe.LName = original.LName
WHERE dupe.row_num <> 1
AND original.row_num = 1
See the SQLFiddle
看到SQLFiddle
#2
2
Your ResultTable
has 2 columns with the same UserId
name. I changed the second to UserId2
for the query below:
您的ResultTable有两个具有相同用户名的列。我将第二个查询改为UserId2:
;WITH cte As
(
SELECT R.UserId, R.UserId2,
MIN(U.UserId) OVER (PARTITION BY U.FName, U.LName) As OriginalUserId
FROM ResultTable R
INNER JOIN UserTable U ON R.UserId = U.UserId
)
UPDATE cte
SET UserId2 = OriginalUserId
#3
0
A little tricky query looks like this:
一个有点棘手的查询是这样的:
;with t as (
select fname+lname name,id,
ROW_NUMBER() over(partition by fname+lname order by id) rn
from #users
)
--for test purpose comment next 2 lines
update #results
set userid=t1.id
--and uncomment the next one
--select t.name,t.id,userid,res,t1.id id1--,(select top 1 id from t t1 where t1.name=t.name and t.rn=1) id1
from t
inner join #results r on t.id=r.userid
inner join t t1 on t.name=t1.name and t1.rn=1
And then you can delete duplicate users
然后你可以删除重复的用户。
;with t as (
select name,id,
ROW_NUMBER() over(partition by name order by id) rn
from #users
)
delete t where rn>1
#1
0
You are on the right track with the cte. The ROW_NUMBER()
function can be used to flag duplicate UserIds, then you can join the cte into the from clause of your update statement to find the UserIds you want to replace, and join again to find the UserIds you want to replace them with.
你和cte的关系很好。ROW_NUMBER()函数可以用来标记重复的UserIds,然后您可以将cte加入到update语句的from子句中,以找到您想要替换的UserIds,并再次连接以找到您想要替换的UserIds。
;WITH cteDedup AS(
SELECT
UserId
,FName
,LName
,ROW_NUMBER() OVER(PARTITION BY FName, LName ORDER BY UserID ASC) AS row_num
FROM UserTable
)
UPDATE rt
SET UserId = original.UserId
FROM ResultsTable rt
JOIN cteDedup dupe
ON rt.UserId = dupe.UserId
JOIN cteDedup original
ON dupe.FName = original.FName
AND dupe.LName = original.LName
WHERE dupe.row_num <> 1
AND original.row_num = 1
See the SQLFiddle
看到SQLFiddle
#2
2
Your ResultTable
has 2 columns with the same UserId
name. I changed the second to UserId2
for the query below:
您的ResultTable有两个具有相同用户名的列。我将第二个查询改为UserId2:
;WITH cte As
(
SELECT R.UserId, R.UserId2,
MIN(U.UserId) OVER (PARTITION BY U.FName, U.LName) As OriginalUserId
FROM ResultTable R
INNER JOIN UserTable U ON R.UserId = U.UserId
)
UPDATE cte
SET UserId2 = OriginalUserId
#3
0
A little tricky query looks like this:
一个有点棘手的查询是这样的:
;with t as (
select fname+lname name,id,
ROW_NUMBER() over(partition by fname+lname order by id) rn
from #users
)
--for test purpose comment next 2 lines
update #results
set userid=t1.id
--and uncomment the next one
--select t.name,t.id,userid,res,t1.id id1--,(select top 1 id from t t1 where t1.name=t.name and t.rn=1) id1
from t
inner join #results r on t.id=r.userid
inner join t t1 on t.name=t1.name and t1.rn=1
And then you can delete duplicate users
然后你可以删除重复的用户。
;with t as (
select name,id,
ROW_NUMBER() over(partition by name order by id) rn
from #users
)
delete t where rn>1