Question
I find myself using reshape2::melt
to obtain a single "long" data.frame
from a hierarchical list
of data.frame
objects. However, the column names of the result has the list hierarchy levels labeled as "L1", "L2", etc. However, since those levels have meaning, I want to give those columns meaningful names instead. What's the best way to do this? Can it be done using the single call to melt
?
我发现自己使用reshape2 :: melt从data.frame对象的分层列表中获取单个“long”data.frame。但是,结果的列名称具有标记为“L1”,“L2”等的列表层次结构级别。但是,由于这些级别具有含义,因此我想为这些列赋予有意义的名称。最好的方法是什么?可以使用单次调用来融化吗?
I am not wed to melt
or reshape2
, so I am open to other approaches or packages.
我不会融化或重塑2,所以我对其他方法或包装持开放态度。
Current setup
Let's suppose we have a hierarchical list of data.frame
objects such as this:
假设我们有一个data.frame对象的分层列表,例如:
library(reshape2)
x <- structure(list(cyl_6 = structure(list(gear_3 = structure(list( mpg = 1:2, qsec = 3:4), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 5:6, qsec = 7:8), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4")), cyl_8 = structure(list(gear_3 = structure(list(mpg = 9:10, qsec = 11:12), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 13:14, qsec = 15:16), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4" ))), .Names = c("cyl_6", "cyl_8"))
When I use melt(x)
, I get the column name of "L1" for the cylinder count and "L2" for the gear count. I would like the column to say "cylinders
" and "gears
", respectively, instead.
当我使用熔化(x)时,我得到圆柱体数量的列号“L1”和齿轮数的“L2”。我希望该列分别代表“气缸”和“齿轮”。
mx <- melt(x)
Here's the output of head(mx)
. I do not want it to just say "L1" and "L2":
这是head(mx)的输出。我不希望它只是说“L1”和“L2”:
1> head(mx)
variable value L2 L1
1 mpg 1 gear_3 cyl_6
2 mpg 2 gear_3 cyl_6
3 qsec 3 gear_3 cyl_6
4 qsec 4 gear_3 cyl_6
5 mpg 5 gear_4 cyl_6
6 mpg 6 gear_4 cyl_6
So, I resort to setting "L1" and "L2" manually:
所以,我手动设置“L1”和“L2”:
names(mx)[3:4] <- c("gears", "cylinders")
Desired output
Here's the desired final column name setup. I would like to be able to achieve this without resetting the "names" of mx manually as a separate step.
这是所需的最终列名称设置。我希望能够实现这一点,而无需手动重置mx的“名称”作为单独的步骤。
1> head(mx)
variable value gears cylinders
1 mpg 1 gear_3 cyl_6
2 mpg 2 gear_3 cyl_6
3 qsec 3 gear_3 cyl_6
4 qsec 4 gear_3 cyl_6
5 mpg 5 gear_4 cyl_6
6 mpg 6 gear_4 cyl_6
2 个解决方案
#1
3
You could try
你可以试试
library(tidyr)
res <- unnest(x, sex)
head(res)
# sex Hair Eye value
#1 MALE Black Brown 32
#2 MALE Brown Brown 53
#3 MALE Red Brown 10
#4 MALE Blond Brown 3
#5 MALE Black Blue 11
#6 MALE Brown Blue 50
For the updated question, it is not easier with unnest
. Repeated calls to unnest
is needed
对于更新的问题,使用者不容易。需要反复拨打不需要的电话
library(dplyr)
dN <- unnest(lapply(x, unnest, gear), cylinders) %>%
gather(variable, value, mpg:qsec)
head(dN,3)
# cylinders gear variable value
#1 cyl_6 gear_3 mpg 1
#2 cyl_6 gear_3 mpg 2
#3 cyl_6 gear_4 mpg 5
#2
2
library('reshape2')
x <- structure(list(cyl_6 = structure(list(gear_3 = structure(list( mpg = 1:2, qsec = 3:4), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 5:6, qsec = 7:8), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4")), cyl_8 = structure(list(gear_3 = structure(list(mpg = 9:10, qsec = 11:12), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 13:14, qsec = 15:16), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4" ))), .Names = c("cyl_6", "cyl_8"))
Looking at the source code, the Ls are hard-coded, so you can't change them on the fly without doing an extra step like you're doing now.
查看源代码,Ls是硬编码的,因此您无法像现在这样做额外的步骤而动态更改它们。
reshape2:::melt.list
# function (data, ..., level = 1)
# {
# parts <- lapply(data, melt, level = level + 1, ...)
# result <- rbind.fill(parts)
# names <- names(data) %||% seq_along(data)
# lengths <- vapply(parts, nrow, integer(1))
# labels <- rep(names, lengths)
# label_var <- attr(data, "varname") %||% paste("L", level, sep = "")
# result[[label_var]] <- labels
# result
# }
What you could do is change this function slightly and add a new argument so you can name them whatever you want:
您可以做的是稍微更改此函数并添加一个新参数,以便您可以随意命名它们:
meltList <- function (data, ..., level = 1, nn) {
require('reshape2')
'%||%' <- function (a, b) if (!is.null(a)) a else b
parts <- lapply(data, melt, level = level + 1, ...)
result <- plyr::rbind.fill(parts)
names <- names(data) %||% seq_along(data)
lengths <- vapply(parts, nrow, integer(1))
labels <- rep(names, lengths)
label_var <- attr(data, "varname") %||% paste("L", level, sep = "")
result[[label_var]] <- labels
if (!missing(nn))
names(result)[grep('^L\\d+', names(result))] <- nn
result
}
ex
前
head(meltList(x, nn = c('gears','cylinders')))
# variable value gears cylinders
# 1 mpg 1 gear_3 cyl_6
# 2 mpg 2 gear_3 cyl_6
# 3 qsec 3 gear_3 cyl_6
# 4 qsec 4 gear_3 cyl_6
# 5 mpg 5 gear_4 cyl_6
# 6 mpg 6 gear_4 cyl_6
This will also work on more nested lists:
这也适用于更多嵌套列表:
y <- list(x = x, y = x)
head(melt(y))
# variable value L3 L2 L1
# 1 mpg 1 gear_3 cyl_6 x
# 2 mpg 2 gear_3 cyl_6 x
# 3 qsec 3 gear_3 cyl_6 x
# 4 qsec 4 gear_3 cyl_6 x
# 5 mpg 5 gear_4 cyl_6 x
# 6 mpg 6 gear_4 cyl_6 x
vs
VS
head(meltList(y, nn = c('gears','cylinders','variable')))
# variable value gears cylinders variable
# 1 mpg 1 gear_3 cyl_6 x
# 2 mpg 2 gear_3 cyl_6 x
# 3 qsec 3 gear_3 cyl_6 x
# 4 qsec 4 gear_3 cyl_6 x
# 5 mpg 5 gear_4 cyl_6 x
# 6 mpg 6 gear_4 cyl_6 x
#1
3
You could try
你可以试试
library(tidyr)
res <- unnest(x, sex)
head(res)
# sex Hair Eye value
#1 MALE Black Brown 32
#2 MALE Brown Brown 53
#3 MALE Red Brown 10
#4 MALE Blond Brown 3
#5 MALE Black Blue 11
#6 MALE Brown Blue 50
For the updated question, it is not easier with unnest
. Repeated calls to unnest
is needed
对于更新的问题,使用者不容易。需要反复拨打不需要的电话
library(dplyr)
dN <- unnest(lapply(x, unnest, gear), cylinders) %>%
gather(variable, value, mpg:qsec)
head(dN,3)
# cylinders gear variable value
#1 cyl_6 gear_3 mpg 1
#2 cyl_6 gear_3 mpg 2
#3 cyl_6 gear_4 mpg 5
#2
2
library('reshape2')
x <- structure(list(cyl_6 = structure(list(gear_3 = structure(list( mpg = 1:2, qsec = 3:4), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 5:6, qsec = 7:8), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4")), cyl_8 = structure(list(gear_3 = structure(list(mpg = 9:10, qsec = 11:12), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 13:14, qsec = 15:16), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4" ))), .Names = c("cyl_6", "cyl_8"))
Looking at the source code, the Ls are hard-coded, so you can't change them on the fly without doing an extra step like you're doing now.
查看源代码,Ls是硬编码的,因此您无法像现在这样做额外的步骤而动态更改它们。
reshape2:::melt.list
# function (data, ..., level = 1)
# {
# parts <- lapply(data, melt, level = level + 1, ...)
# result <- rbind.fill(parts)
# names <- names(data) %||% seq_along(data)
# lengths <- vapply(parts, nrow, integer(1))
# labels <- rep(names, lengths)
# label_var <- attr(data, "varname") %||% paste("L", level, sep = "")
# result[[label_var]] <- labels
# result
# }
What you could do is change this function slightly and add a new argument so you can name them whatever you want:
您可以做的是稍微更改此函数并添加一个新参数,以便您可以随意命名它们:
meltList <- function (data, ..., level = 1, nn) {
require('reshape2')
'%||%' <- function (a, b) if (!is.null(a)) a else b
parts <- lapply(data, melt, level = level + 1, ...)
result <- plyr::rbind.fill(parts)
names <- names(data) %||% seq_along(data)
lengths <- vapply(parts, nrow, integer(1))
labels <- rep(names, lengths)
label_var <- attr(data, "varname") %||% paste("L", level, sep = "")
result[[label_var]] <- labels
if (!missing(nn))
names(result)[grep('^L\\d+', names(result))] <- nn
result
}
ex
前
head(meltList(x, nn = c('gears','cylinders')))
# variable value gears cylinders
# 1 mpg 1 gear_3 cyl_6
# 2 mpg 2 gear_3 cyl_6
# 3 qsec 3 gear_3 cyl_6
# 4 qsec 4 gear_3 cyl_6
# 5 mpg 5 gear_4 cyl_6
# 6 mpg 6 gear_4 cyl_6
This will also work on more nested lists:
这也适用于更多嵌套列表:
y <- list(x = x, y = x)
head(melt(y))
# variable value L3 L2 L1
# 1 mpg 1 gear_3 cyl_6 x
# 2 mpg 2 gear_3 cyl_6 x
# 3 qsec 3 gear_3 cyl_6 x
# 4 qsec 4 gear_3 cyl_6 x
# 5 mpg 5 gear_4 cyl_6 x
# 6 mpg 6 gear_4 cyl_6 x
vs
VS
head(meltList(y, nn = c('gears','cylinders','variable')))
# variable value gears cylinders variable
# 1 mpg 1 gear_3 cyl_6 x
# 2 mpg 2 gear_3 cyl_6 x
# 3 qsec 3 gear_3 cyl_6 x
# 4 qsec 4 gear_3 cyl_6 x
# 5 mpg 5 gear_4 cyl_6 x
# 6 mpg 6 gear_4 cyl_6 x