两个数据框合并
data1=(
names = c('AAA', 'CCC', 'DDD', 'EEE', 'FFF' ),
logFC = c(3, -0.4, 5, 0.4, -3 ),
bmi = c('正常', '正常','超重','肥胖','正常')
)
data2=(
names = c('AAA','BBB', 'CCC', 'DDD', 'EEE' ),
gender = c(1, 2, 1, 2, 1), #1=男 2=女
bmi = c('正常','低体重','正常','超重','肥胖' )
)
merge(data1,data2,all=TRUE)
# 同样结果,可以不用加by
merge(data1,data2,all=TRUE,by = intersect(names(data1), names(data2)))
names |
bmi |
logFC |
gender |
AAA |
正常 |
3 |
1 |
BBB |
低体重 |
NA |
2 |
CCC |
正常 |
-0.4 |
1 |
DDD |
超重 |
5 |
2 |
EEE |
肥胖 |
0.4 |
1 |
FFF |
正常 |
-3 |
NA |
dplyr::full_join函数情况
dplyr::full_join(data1,data2)
dplyr::full_join函数情况,如果加by,会出现重复的变量名为 .x .y
dplyr::full_join(data1,data2,by=c("names"))
> dplyr::full_join(data1,data2)
Joining, by = c("names", "bmi")
names logFC bmi gender
1 AAA 3.0 正常 1
2 CCC -0.4 正常 1
3 DDD 5.0 超重 2
4 EEE 0.4 肥胖 1
5 FFF -3.0 正常 NA
6 BBB NA 低体重 2
names |
logFC |
|
gender |
|
AAA |
3 |
正常 |
1 |
正常 |
CCC |
-0.4 |
正常 |
1 |
正常 |
DDD |
5 |
超重 |
2 |
超重 |
EEE |
0.4 |
肥胖 |
1 |
肥胖 |
FFF |
-3 |
正常 |
NA |
NA |
BBB |
NA |
NA |
2 |
低体重 |
R merge()
与dplyr join()
dplyr |
base |
inner_join(df1, df2) |
merge(df1, df2) |
left_join(df1, df2) |
merge(df1, df2, = TRUE) |
right_join(df1, df2) |
merge(df1, df2, = TRUE) |
full_join(df1, df2) |
merge(df1, df2, all = TRUE) |
semi_join(df1, df2) |
df1[df1
x
x %in% df2
xx, , drop = FALSE] |
anti_join(df1, df2) |
df1[!df1
x
x %in% df2
xx, , drop = FALSE] |
# 假设相同变量,里面得内容不一样,如AAA-BMI-常1
data2=(
names = c('AAA','BBB', 'CCC', 'DDD', 'EEE' ),
gender = c(1, 2, 1, 2, 1), #1=男 2=女
bmi = c('正常1','低体重','正常','超重','肥胖' )
)
merge(data1,data2,all=TRUE,by = intersect(names(data1), names(data2)))
# 结果会另起一行
names |
bmi |
logFC |
gender |
AAA |
正常 |
3 |
NA |
AAA |
正常1 |
NA |
1 |
BBB |
低体重 |
NA |
2 |
CCC |
正常 |
-0.4 |
1 |
DDD |
超重 |
5 |
2 |
EEE |
肥胖 |
0.4 |
1 |
FFF |
正常 |
-3 |
NA |
多个数据框合并
data1=(
names = c('AAA', 'CCC', 'DDD', 'EEE', 'FFF' ),
logFC = c(3, -0.4, 5, 0.4, -3 ),
bmi = c('正常', '正常','超重','肥胖','正常')
)
data2=(
names = c('AAA','BBB', 'CCC', 'DDD', 'EEE' ),
gender = c(1, 2, 1, 2, 1), #1=男 2=女
bmi = c('正常','低体重','正常','超重','肥胖' )
)
data3=(
names = c('GGG' ),
gender = c("男"), #1=男 2=女
bmi = c('正常' )
)
file=ls(pattern = "data")
ALL1=list(data1,data2,data3)
multimerge<-function(dat=list(),...){
if(length(dat)<2)return((dat))
mergedat<-dat[[1]]
dat[[1]]<-NULL
for(i in dat){
mergedat<-merge(all=TRUE,mergedat,i,...)
}
return(mergedat)
}
multimerge(ALL1)