R语言merge函数简介

authors <- (
    surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
    nationality = c("US", "Australia", "US", "UK", "Australia"),
    deceased = c("yes", rep("no", 4)))
books <- (
    name = I(c("Tukey", "Venables", "Tierney",
             "Ripley", "Ripley", "McNeil", "R Core")),
    title = c("Exploratory Data Analysis",
              "Modern Applied Statistics ...",
              "LISP-STAT",
              "Spatial Statistics", "Stochastic Simulation",
              "Interactive Data Analysis",
              "An Introduction to R"),
    = c(NA, "Ripley", NA, NA, NA, NA,
                     "Venables & Smith"))

如果要实现类似sql里面的inner join 功能，则用代码
m1 <- merge(authors, books, = "surname", = "name")
如果要实现left join功能则用代码
m1 <- merge(authors, books, = "surname", = "name",=TRUE)
right join功能代码
m1 <- merge(authors, books, = "surname", = "name",=TRUE)
all join功能代码
m1 <- merge(authors, books, = "surname", = "name",all=TRUE)

关于单变量匹配的总结就是这些，但对于多变量匹配呢，例如下面两个表，需要对k1,k2两个变量都相等的情况下匹配
x <- (k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)
y <- (k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)

匹配代码如下merge(x, y, by = c("k1","k2")) #inner join

> ID<-c(1,2,3,4)
> name<-c("Jim","Tony","Lisa","Tom")
> score<-c(89,22,78,78)
> student1<-(ID,name)
> student2<-(ID,score)
> student1
ID name
1 1 Jim
2 2 Tony
3 3 Lisa
4 4 Tom
> student2
ID score
1 1    89
2 2    22
3 3    78
4 4    78
> total_student<-merge(student1,student2,by="ID")
> total_student
ID name score
1 1 Jim    89
2 2 Tony    22
3 3 Lisa    78
4 4 Tom    78
> ID<-c(1,2,3)
> name<-c("Jame","Kevin","Sunny")
> student1<-(ID,name)
> ID<-c(4,5,6)
> name<-c("Sun","Frame","Eric")
> student2<-(ID,name)
> student1
ID name
1 1 Jame
2 2 Kevin
3 3 Sunny
> student2
ID name
1 4   Sun
2 5 Frame
3 6 Eric
> total<-rbind(student1,student2)
> total
ID name
1 1 Jame
2 2 Kevin
3 3 Sunny
4 4   Sun
5 5 Frame
6 6 Eric
> authors <- (
+     surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
+     nationality = c("US", "Australia", "US", "UK", "Australia"),
+     deceased = c("yes", rep("no", 4)))
> authors
   surname nationality deceased
1    Tukey          US      yes
2 Venables   Australia       no
3 Tierney          US       no
4   Ripley          UK       no
5   McNeil   Australia       no
> books <- (
+     name = I(c("Tukey", "Venables", "Tierney",
+              "Ripley", "Ripley", "McNeil", "R Core")),
+     title = c("Exploratory Data Analysis",
+               "Modern Applied Statistics ...",
+               "LISP-STAT",
+               "Spatial Statistics", "Stochastic Simulation",
+               "Interactive Data Analysis",
+               "An Introduction to R"),
+     = c(NA, "Ripley", NA, NA, NA, NA,
+                      "Venables & Smith"))
>
> books
      name                         title
1    Tukey     Exploratory Data Analysis             <NA>
2 Venables Modern Applied Statistics ...           Ripley
3 Tierney                     LISP-STAT             <NA>
4   Ripley            Spatial Statistics             <NA>
5   Ripley         Stochastic Simulation             <NA>
6   McNeil     Interactive Data Analysis             <NA>
7   R Core          An Introduction to R Venables & Smith
> authors
   surname nationality deceased
1    Tukey          US      yes
2 Venables   Australia       no
3 Tierney          US       no
4   Ripley          UK       no
5   McNeil   Australia       no
> m1 <- merge(authors, books, = "surname", = "name")
> m1
   surname nationality deceased                         title
1   McNeil   Australia       no     Interactive Data Analysis         <NA>
2   Ripley          UK       no            Spatial Statistics         <NA>
3   Ripley          UK       no         Stochastic Simulation         <NA>
4 Tierney          US       no                     LISP-STAT         <NA>
5    Tukey          US      yes     Exploratory Data Analysis         <NA>
6 Venables   Australia       no Modern Applied Statistics ...       Ripley
> m1 <- merge(authors, books, = "surname", = "name",=TRUE)
> m1
   surname nationality deceased                         title
1   McNeil   Australia       no     Interactive Data Analysis         <NA>
2   Ripley          UK       no            Spatial Statistics         <NA>
3   Ripley          UK       no         Stochastic Simulation         <NA>
4 Tierney          US       no                     LISP-STAT         <NA>
5    Tukey          US      yes     Exploratory Data Analysis         <NA>
6 Venables   Australia       no Modern Applied Statistics ...       Ripley
> m1 <- merge(authors, books, = "surname", = "name",=TRUE)
> m1
   surname nationality deceased                         title
1   McNeil   Australia       no     Interactive Data Analysis
2   R Core        <NA>     <NA>          An Introduction to R
3   Ripley          UK       no            Spatial Statistics
4   Ripley          UK       no         Stochastic Simulation
5 Tierney          US       no                     LISP-STAT
6    Tukey          US      yes     Exploratory Data Analysis
7 Venables   Australia       no Modern Applied Statistics ...

1             <NA>
2 Venables & Smith
3             <NA>
4             <NA>
5             <NA>
6             <NA>
7           Ripley
> x <- (k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)
> y <- (k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)
> x
k1 k2 data
1 NA 1    1
2 NA NA    2
3 3 NA    3
4 4 4    4
5 5 5    5
> y
k1 k2 data
1 NA NA    1
2 2 NA    2
3 NA 3    3
4 4 4    4
5 5 5    5
> merge(x, y, by = c("k1","k2"))
k1 k2
1 4 4      4      4
2 5 5      5      5
3 NA NA      2      1
>

秒客网

R语言merge函数简介

相关文章