authors <- (
surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
nationality = c("US", "Australia", "US", "UK", "Australia"),
deceased = c("yes", rep("no", 4)))
books <- (
name = I(c("Tukey", "Venables", "Tierney",
"Ripley", "Ripley", "McNeil", "R Core")),
title = c("Exploratory Data Analysis",
"Modern Applied Statistics ...",
"LISP-STAT",
"Spatial Statistics", "Stochastic Simulation",
"Interactive Data Analysis",
"An Introduction to R"),
= c(NA, "Ripley", NA, NA, NA, NA,
"Venables & Smith"))
如果要实现类似sql里面的inner join 功能,则用代码
m1 <- merge(authors, books, = "surname", = "name")
如果要实现left join功能则用代码
m1 <- merge(authors, books, = "surname", = "name",=TRUE)
right join功能代码
m1 <- merge(authors, books, = "surname", = "name",=TRUE)
all join功能代码
m1 <- merge(authors, books, = "surname", = "name",all=TRUE)
关于单变量匹配的总结就是这些,但对于多变量匹配呢,例如下面两个表,需要对k1,k2两个变量都相等的情况下匹配
x <- (k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)
y <- (k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)
匹配代码如下merge(x, y, by = c("k1","k2")) #inner join
> ID<-c(1,2,3,4)
> name<-c("Jim","Tony","Lisa","Tom")
> score<-c(89,22,78,78)
> student1<-(ID,name)
> student2<-(ID,score)
> student1
ID name
1 1 Jim
2 2 Tony
3 3 Lisa
4 4 Tom
> student2
ID score
1 1 89
2 2 22
3 3 78
4 4 78
> total_student<-merge(student1,student2,by="ID")
> total_student
ID name score
1 1 Jim 89
2 2 Tony 22
3 3 Lisa 78
4 4 Tom 78
> ID<-c(1,2,3)
> name<-c("Jame","Kevin","Sunny")
> student1<-(ID,name)
> ID<-c(4,5,6)
> name<-c("Sun","Frame","Eric")
> student2<-(ID,name)
> student1
ID name
1 1 Jame
2 2 Kevin
3 3 Sunny
> student2
ID name
1 4 Sun
2 5 Frame
3 6 Eric
> total<-rbind(student1,student2)
> total
ID name
1 1 Jame
2 2 Kevin
3 3 Sunny
4 4 Sun
5 5 Frame
6 6 Eric
> authors <- (
+ surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
+ nationality = c("US", "Australia", "US", "UK", "Australia"),
+ deceased = c("yes", rep("no", 4)))
> authors
surname nationality deceased
1 Tukey US yes
2 Venables Australia no
3 Tierney US no
4 Ripley UK no
5 McNeil Australia no
> books <- (
+ name = I(c("Tukey", "Venables", "Tierney",
+ "Ripley", "Ripley", "McNeil", "R Core")),
+ title = c("Exploratory Data Analysis",
+ "Modern Applied Statistics ...",
+ "LISP-STAT",
+ "Spatial Statistics", "Stochastic Simulation",
+ "Interactive Data Analysis",
+ "An Introduction to R"),
+ = c(NA, "Ripley", NA, NA, NA, NA,
+ "Venables & Smith"))
>
> books
name title
1 Tukey Exploratory Data Analysis <NA>
2 Venables Modern Applied Statistics ... Ripley
3 Tierney LISP-STAT <NA>
4 Ripley Spatial Statistics <NA>
5 Ripley Stochastic Simulation <NA>
6 McNeil Interactive Data Analysis <NA>
7 R Core An Introduction to R Venables & Smith
> authors
surname nationality deceased
1 Tukey US yes
2 Venables Australia no
3 Tierney US no
4 Ripley UK no
5 McNeil Australia no
> m1 <- merge(authors, books, = "surname", = "name")
> m1
surname nationality deceased title
1 McNeil Australia no Interactive Data Analysis <NA>
2 Ripley UK no Spatial Statistics <NA>
3 Ripley UK no Stochastic Simulation <NA>
4 Tierney US no LISP-STAT <NA>
5 Tukey US yes Exploratory Data Analysis <NA>
6 Venables Australia no Modern Applied Statistics ... Ripley
> m1 <- merge(authors, books, = "surname", = "name",=TRUE)
> m1
surname nationality deceased title
1 McNeil Australia no Interactive Data Analysis <NA>
2 Ripley UK no Spatial Statistics <NA>
3 Ripley UK no Stochastic Simulation <NA>
4 Tierney US no LISP-STAT <NA>
5 Tukey US yes Exploratory Data Analysis <NA>
6 Venables Australia no Modern Applied Statistics ... Ripley
> m1 <- merge(authors, books, = "surname", = "name",=TRUE)
> m1
surname nationality deceased title
1 McNeil Australia no Interactive Data Analysis
2 R Core <NA> <NA> An Introduction to R
3 Ripley UK no Spatial Statistics
4 Ripley UK no Stochastic Simulation
5 Tierney US no LISP-STAT
6 Tukey US yes Exploratory Data Analysis
7 Venables Australia no Modern Applied Statistics ...
1 <NA>
2 Venables & Smith
3 <NA>
4 <NA>
5 <NA>
6 <NA>
7 Ripley
> x <- (k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)
> y <- (k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)
> x
k1 k2 data
1 NA 1 1
2 NA NA 2
3 3 NA 3
4 4 4 4
5 5 5 5
> y
k1 k2 data
1 NA NA 1
2 2 NA 2
3 NA 3 3
4 4 4 4
5 5 5 5
> merge(x, y, by = c("k1","k2"))
k1 k2
1 4 4 4 4
2 5 5 5 5
3 NA NA 2 1
>