如何使用DF /列表索引将数据帧列表连接到一个数据帧?

时间:2021-10-28 22:56:47
library(dplyr); library(tibble)

Here is my sample data. A list of small dataframes (listOfDFs) I want to join to a single dataframe, (points).

这是我的示例数据。我希望加入单个数据帧的小数据帧列表(listOfDF)(点)。

listOfDfs has 5 small dataframes with 7 rows total, and points is one dataframe with 7 rows:

listOfDfs有5个小数据帧,总共7行,points是一个7行的数据帧:

points <- structure(list(EVENT_ID_CNTY = c("LBY1243", "LBY3389", "LBY3393", 
                    "LBY3506", "LBY3822"), year = c(2013, 2015, 2015, 2015, 2015), 
                    COUNTRY = c("Libya", "Libya", "Libya", "Libya", "Libya")), .Names = c("EVENT_ID_CNTY", 
                    "year", "COUNTRY"), row.names = c(NA, -5L), class = c("tbl_df", 
                    "tbl", "data.frame"))

listOfDFs <- structure(list(`1` = structure(list(CELL_ID = c(165267, 164547
), gwno = c(620L, 620L), POP = c(751.0737, 754.5745), prio_country = c("Libya", 
"Libya")), .Names = c("CELL_ID", "gwno", "POP", "prio_country"
), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), `2` = structure(list(CELL_ID = 172475, gwno = 620L, POP = 11676, 
prio_country = "Libya"), .Names = c("CELL_ID", "gwno", "POP", 
"prio_country"), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame")), `3` = structure(list(CELL_ID = 172475, 
gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID", 
"gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame")), `4` = structure(list(CELL_ID = 172475, 
gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID", 
"gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame")), `5` = structure(list(CELL_ID = c(165267, 
164547), gwno = c(620L, 620L), POP = c(751.0737, 754.5745), prio_country = c("Libya", 
"Libya")), .Names = c("CELL_ID", "gwno", "POP", "prio_country"
), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))), .Names = c("1", "2", "3", "4", "5"))

These data look like the following:

这些数据如下所示:

points
#> # A tibble: 5 x 3
#>   EVENT_ID_CNTY  year COUNTRY
#>   <chr>         <dbl> <chr>  
#> 1 LBY1243       2013. Libya  # Corresponds to the two items in listOfDFs[[1]]
#> 2 LBY3389       2015. Libya  # Corresponds to the one item in listOfDFs[[2]]
#> 3 LBY3393       2015. Libya  
#> 4 LBY3506       2015. Libya  
#> 5 LBY3822       2015. Libya


listOfDFs
#> $`1`
#> # A tibble: 2 x 4
#>   CELL_ID  gwno   POP prio_country
#>     <dbl> <int> <dbl> <chr>       
#> 1 165267.   620  751. Libya       
#> 2 164547.   620  755. Libya       
#> 
#> $`2`
#> # A tibble: 1 x 4
#>   CELL_ID  gwno    POP prio_country
#>     <dbl> <int>  <dbl> <chr>       
#> 1 172475.   620 11676. Libya       
#> 
#> $`3`
#> # A tibble: 1 x 4
#>   CELL_ID  gwno    POP prio_country
#>     <dbl> <int>  <dbl> <chr>       
#> 1 172475.   620 11676. Libya       
#> 
#> $`4`
#> # A tibble: 1 x 4
#>   CELL_ID  gwno    POP prio_country
#>     <dbl> <int>  <dbl> <chr>       
#> 1 172475.   620 11676. Libya       
#> 
#> $`5`
#> # A tibble: 2 x 4
#>   CELL_ID  gwno   POP prio_country
#>     <dbl> <int> <dbl> <chr>       
#> 1 165267.   620  751. Libya       
#> 2 164547.   620  755. Libya

How do I join these two such that they follow the pattern below? Do I have to find a way to bind them on the dataframe's row index or is there a more elegant way?:

我如何加入这两个,使他们遵循以下模式?我是否必须找到一种方法将它们绑定在数据框的行索引上,还是有更优雅的方式?:

#>   EVENT_ID_CNTY  year COUNTRY  CELL_ID  gwno   POP prio_country
#>   <chr>         <dbl> <chr>     <dbl> <int> <dbl> <chr>  
#>   LBY1243       2013. Libya     165267.   620  751. Libya 
#>   LBY1243       2013. Libya     164547.   620  755. Libya 
#>   LBY3389       2015. Libya     172475.   620 11676. Libya
#>   LBY3393       2015. Libya     172475.   620 11676. Libya
#>   LBY3506       2015. Libya     172475.   620 11676. Libya
#>   LBY3822       2015. Libya     165267.   620  751. Libya 
#>   LBY3822       2015. Libya     164547.   620  755. Libya
.........

3 个解决方案

#1


2  

library(tidyr)
points %>% mutate(mm=listOfDFs) %>% unnest

# A tibble: 7 x 7
  EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno    POP prio_country
  <chr>         <dbl> <chr>     <dbl> <int>  <dbl> <chr>       
1 LBY1243        2013 Libya    165267   620   751. Libya       
2 LBY1243        2013 Libya    164547   620   755. Libya       
3 LBY3389        2015 Libya    172475   620 11676  Libya       
4 LBY3393        2015 Libya    172475   620 11676  Libya       
5 LBY3506        2015 Libya    172475   620 11676  Libya       
6 LBY3822        2015 Libya    165267   620   751. Libya       
7 LBY3822        2015 Libya    164547   620   755. Libya  

#2


1  

Based on the example, we replicate the rows of the 'points' dataset with the nrow of each element of the list and bind the columns together

基于该示例,我们使用列表中每个元素的nrow复制'points'数据集的行,并将列绑定在一起

library(tidyverse)
bind_cols(points[rep(seq_len(nrow(points)),
       map_dbl(listOfDFs, nrow)),], bind_rows(listOfDFs))
# A tibble: 7 x 7
#EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno    POP prio_country
#   <chr>         <dbl> <chr>     <dbl> <int>  <dbl> <chr>       
#1 LBY1243        2013 Libya    165267   620   751. Libya       
#2 LBY1243        2013 Libya    164547   620   755. Libya       
#3 LBY3389        2015 Libya    172475   620 11676  Libya       
#4 LBY3393        2015 Libya    172475   620 11676  Libya       
#5 LBY3506        2015 Libya    172475   620 11676  Libya       
#6 LBY3822        2015 Libya    165267   620   751. Libya       
#7 LBY3822        2015 Libya    164547   620   755. Libya    

Or using in a chain

或者在链中使用

map_dbl(listOfDFs, nrow) %>% 
      rep(seq_len(nrow(points)), .) %>%
      magrittr::extract(points, ., ) %>%
      bind_cols(bind_rows(listOfDFs))

Or set the names of the list with 'EVENT_ID_CNTY' to create an id column and join with the 'points'

或者使用“EVENT_ID_CNTY”设置列表名称以创建id列并使用“points”加入

listOfDFs %>% 
    set_names(points$EVENT_ID_CNTY) %>% 
    bind_rows(.id = 'EVENT_ID_CNTY') %>% 
    left_join(points)

#3


1  

An option using dplyr::bind_rows and dplyr::inner_join can be as:

使用dplyr :: bind_rows和dplyr :: inner_join的选项可以是:

library(dplyr)

mutate(points, rn = as.character(row_number())) %>%
  inner_join(bind_rows(listOfDFs, .id="Name"), by=c("rn" = "Name")) %>%
  select(-rn)

# # A tibble: 7 x 7
# EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno   POP prio_country
# <chr>         <dbl> <chr>     <dbl> <int> <dbl> <chr>       
# 1 LBY1243        2013 Libya    165267   620   751 Libya       
# 2 LBY1243        2013 Libya    164547   620   755 Libya       
# 3 LBY3389        2015 Libya    172475   620 11676 Libya       
# 4 LBY3393        2015 Libya    172475   620 11676 Libya       
# 5 LBY3506        2015 Libya    172475   620 11676 Libya       
# 6 LBY3822        2015 Libya    165267   620   751 Libya       
# 7 LBY3822        2015 Libya    164547   620   755 Libya

#1


2  

library(tidyr)
points %>% mutate(mm=listOfDFs) %>% unnest

# A tibble: 7 x 7
  EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno    POP prio_country
  <chr>         <dbl> <chr>     <dbl> <int>  <dbl> <chr>       
1 LBY1243        2013 Libya    165267   620   751. Libya       
2 LBY1243        2013 Libya    164547   620   755. Libya       
3 LBY3389        2015 Libya    172475   620 11676  Libya       
4 LBY3393        2015 Libya    172475   620 11676  Libya       
5 LBY3506        2015 Libya    172475   620 11676  Libya       
6 LBY3822        2015 Libya    165267   620   751. Libya       
7 LBY3822        2015 Libya    164547   620   755. Libya  

#2


1  

Based on the example, we replicate the rows of the 'points' dataset with the nrow of each element of the list and bind the columns together

基于该示例,我们使用列表中每个元素的nrow复制'points'数据集的行,并将列绑定在一起

library(tidyverse)
bind_cols(points[rep(seq_len(nrow(points)),
       map_dbl(listOfDFs, nrow)),], bind_rows(listOfDFs))
# A tibble: 7 x 7
#EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno    POP prio_country
#   <chr>         <dbl> <chr>     <dbl> <int>  <dbl> <chr>       
#1 LBY1243        2013 Libya    165267   620   751. Libya       
#2 LBY1243        2013 Libya    164547   620   755. Libya       
#3 LBY3389        2015 Libya    172475   620 11676  Libya       
#4 LBY3393        2015 Libya    172475   620 11676  Libya       
#5 LBY3506        2015 Libya    172475   620 11676  Libya       
#6 LBY3822        2015 Libya    165267   620   751. Libya       
#7 LBY3822        2015 Libya    164547   620   755. Libya    

Or using in a chain

或者在链中使用

map_dbl(listOfDFs, nrow) %>% 
      rep(seq_len(nrow(points)), .) %>%
      magrittr::extract(points, ., ) %>%
      bind_cols(bind_rows(listOfDFs))

Or set the names of the list with 'EVENT_ID_CNTY' to create an id column and join with the 'points'

或者使用“EVENT_ID_CNTY”设置列表名称以创建id列并使用“points”加入

listOfDFs %>% 
    set_names(points$EVENT_ID_CNTY) %>% 
    bind_rows(.id = 'EVENT_ID_CNTY') %>% 
    left_join(points)

#3


1  

An option using dplyr::bind_rows and dplyr::inner_join can be as:

使用dplyr :: bind_rows和dplyr :: inner_join的选项可以是:

library(dplyr)

mutate(points, rn = as.character(row_number())) %>%
  inner_join(bind_rows(listOfDFs, .id="Name"), by=c("rn" = "Name")) %>%
  select(-rn)

# # A tibble: 7 x 7
# EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno   POP prio_country
# <chr>         <dbl> <chr>     <dbl> <int> <dbl> <chr>       
# 1 LBY1243        2013 Libya    165267   620   751 Libya       
# 2 LBY1243        2013 Libya    164547   620   755 Libya       
# 3 LBY3389        2015 Libya    172475   620 11676 Libya       
# 4 LBY3393        2015 Libya    172475   620 11676 Libya       
# 5 LBY3506        2015 Libya    172475   620 11676 Libya       
# 6 LBY3822        2015 Libya    165267   620   751 Libya       
# 7 LBY3822        2015 Libya    164547   620   755 Libya