【R语言】两个/N个数据合并merge函数

时间:2025-02-22 07:04:39

两个数据框合并

data1=(
  names = c('AAA',       'CCC', 'DDD', 'EEE',  'FFF' ),
  logFC = c(3, -0.4, 5, 0.4, -3 ),
  bmi = c('正常',        '正常','超重','肥胖','正常')
)
data2=(
  names = c('AAA','BBB',  'CCC', 'DDD', 'EEE'          ),
  gender = c(1, 2, 1, 2, 1),    #1=男 2=女
  bmi = c('正常','低体重','正常','超重','肥胖'          )
)

merge(data1,data2,all=TRUE)
# 同样结果,可以不用加by
merge(data1,data2,all=TRUE,by = intersect(names(data1), names(data2)))
names bmi logFC gender
AAA 正常 3 1
BBB 低体重 NA 2
CCC 正常 -0.4 1
DDD 超重 5 2
EEE 肥胖 0.4 1
FFF 正常 -3 NA

dplyr::full_join函数情况

dplyr::full_join(data1,data2)

dplyr::full_join函数情况,如果加by,会出现重复的变量名为 .x .y

dplyr::full_join(data1,data2,by=c("names"))

> dplyr::full_join(data1,data2)      
Joining, by = c("names", "bmi")
  names logFC    bmi gender
1   AAA   3.0   正常      1
2   CCC  -0.4   正常      1
3   DDD   5.0   超重      2
4   EEE   0.4   肥胖      1
5   FFF  -3.0   正常     NA
6   BBB    NA 低体重      2
names logFC gender
AAA 3 正常 1 正常
CCC -0.4 正常 1 正常
DDD 5 超重 2 超重
EEE 0.4 肥胖 1 肥胖
FFF -3 正常 NA NA
BBB NA NA 2 低体重

R merge()dplyr join()

dplyr base
inner_join(df1, df2) merge(df1, df2)
left_join(df1, df2) merge(df1, df2, = TRUE)
right_join(df1, df2) merge(df1, df2, = TRUE)
full_join(df1, df2) merge(df1, df2, all = TRUE)
semi_join(df1, df2) df1[df1 x x %in% df2 xx, , drop = FALSE]
anti_join(df1, df2) df1[!df1 x x %in% df2 xx, , drop = FALSE]
# 假设相同变量,里面得内容不一样,如AAA-BMI-常1
data2=(
  names = c('AAA','BBB',  'CCC', 'DDD', 'EEE'          ),
  gender = c(1, 2, 1, 2, 1),    #1=男 2=女
  bmi = c('正常1','低体重','正常','超重','肥胖'          )
)

merge(data1,data2,all=TRUE,by = intersect(names(data1), names(data2)))
# 结果会另起一行
names bmi logFC gender
AAA 正常 3 NA
AAA 正常1 NA 1
BBB 低体重 NA 2
CCC 正常 -0.4 1
DDD 超重 5 2
EEE 肥胖 0.4 1
FFF 正常 -3 NA

多个数据框合并

data1=(
  names = c('AAA',       'CCC', 'DDD', 'EEE',  'FFF' ),
  logFC = c(3, -0.4, 5, 0.4, -3 ),
  bmi = c('正常',        '正常','超重','肥胖','正常')
)
data2=(
  names = c('AAA','BBB',  'CCC', 'DDD', 'EEE'          ),
  gender = c(1, 2, 1, 2, 1),    #1=男 2=女
  bmi = c('正常','低体重','正常','超重','肥胖'          )
)

data3=(
  names = c('GGG'         ),
  gender = c("男"),    #1=男 2=女
  bmi = c('正常'      )
)

file=ls(pattern = "data")
ALL1=list(data1,data2,data3)

multimerge<-function(dat=list(),...){
  if(length(dat)<2)return((dat))
  mergedat<-dat[[1]]
  dat[[1]]<-NULL
  for(i in dat){
    mergedat<-merge(all=TRUE,mergedat,i,...)
  }
  return(mergedat)
}
multimerge(ALL1)