I have the following table:
我有以下表格:
Group Value
---- ----
1 A
1 B
1 C
1 D
2 A
2 B
2 C
For each of the two groups, I want to return all possible combinations of values. For group 1, e.g., the possible combinations of are (A,B), (A,C), (A,D), (B,C), (B,D), (C,D), (A,B,C), (B,D,C), (D,C,A), (C,A,B). Analogous, for group 2 it is (A,B), (A,C), (B,C) [Remark: I don't want to consider (1) the combintions with just one value, (2) the combination with all values and (3) the combination with no values. Thus I have 2^(n)-n-1-1 combinations for n different values].
对于这两组中的每一组,我希望返回所有可能的值组合。组1,例如,可能的组合(A,B)(A、C)(A,D)、(B,C)、(B,D),(C,D)(A,B,C)、(B、D、C)(D C),(C,A,B)。类似的,对于第2组,它是(A,B), (A,C), (B,C)[注:我不想考虑(1)只有一个值的组合,(2)与所有值的组合,(3)没有值的组合。因此我有2 ^ n(n)-n-1-1组合不同的值)。
I want to list all those combinations with the help of an additional column "Combi". This column numbers the different combinations consecutively.
我想在另外一个专栏“Combi”的帮助下列出所有这些组合。这个列连续地编号不同的组合。
Group Combi Value
---- ---- ----
1 1 A
1 1 B
1 2 A
1 2 C
1 3 A
1 3 D
1 4 B
1 4 C
1 5 B
1 5 D
1 6 C
1 6 C
1 7 A
1 7 B
1 7 C
1 8 B
1 8 C
1 8 D
1 9 C
1 9 D
1 9 A
1 10 D
1 10 A
1 10 B
2 11 A
2 11 B
2 12 A
2 12 C
2 13 B
2 13 C
How do I do this in R?
在R中怎么做呢?
3 个解决方案
#1
2
Here is a general tidyverse solution, should work with sets of values that have more than 3 items.
这里有一个通用的tidyverse解决方案,应该使用具有3个以上项的值集。
The idea is to use combn
(with m = 2 then 3 etc) and format the output as a tibble
for different Group
and m
values. From there we can use tidyverse
functions map_dfr
and unnest
. Finally as we have multiple ids rather than one, we build a table of unique ids, build the unique combi
id and join it back to our result.
其思想是使用combn (m = 2,然后是3)并将输出格式化为针对不同组和m值的tibblefor。从那里我们可以使用tidyverse函数map_dfr和unnest。最后,当我们拥有多个id而不是一个id时,我们构建一个惟一id表,构建惟一的combi id并将其连接到结果。
# convenience fonction to store combinations in a long format
combi_as_tibble <- function(n,values) combn(values,n) %>%
{tibble(id = rep(seq(ncol(.)),each=nrow(.)),Value=c(.))}
combi_as_tibble(2,letters[1:3]) # example
# # A tibble: 6 x 2
# id Value
# <chr> <chr>
# 1 1 a
# 2 1 b
# 3 2 a
# 4 2 c
# 5 3 b
# 6 3 c
df1 %>% group_by(Group) %>%
summarize(combis = list(
map_dfr(2:(length(unique(Value))-1),combi_as_tibble,Value,.id="id2")
)) %>% # by Group, build a long tibble with all combinations
unnest %>% # unnest to get a long unnested table
left_join(.,select(.,Group,id2,id) %>% distinct %>% mutate(combi=row_number())
) %>% # build combi ids
select(Group,Value,combi) %>%
as.data.frame
# Group Value combi
# 1 1 A 1
# 2 1 B 1
# 3 1 A 2
# 4 1 C 2
# 5 1 A 3
# 6 1 D 3
# 7 1 B 4
# 8 1 C 4
# 9 1 B 5
# 10 1 D 5
# 11 1 C 6
# 12 1 D 6
# 13 1 A 7
# 14 1 B 7
# 15 1 C 7
# 16 1 A 8
# 17 1 B 8
# 18 1 D 8
# 19 1 A 9
# 20 1 C 9
# 21 1 D 9
# 22 1 B 10
# 23 1 C 10
# 24 1 D 10
# 25 2 A 11
# 26 2 B 11
# 27 2 A 12
# 28 2 C 12
# 29 2 B 13
# 30 2 C 13
data
数据
df1 <- read.table(text="Group Value
1 A
1 B
1 C
1 D
2 A
2 B
2 C",h=T,strin=F)
#2
0
A possible solution with data.table
:
有数据的可能解决方案。
library(data.table)
setDT(dat)[, .(Value = {n <- 2:(uniqueN(Value)-1);
unlist(lapply(n, function(x) combn(Value, x)))})
, by = Group
][, Combi := cumsum(c(1, diff(match(Value, LETTERS)) < 0))][]
which gives:
这使:
Group Value Combi 1: 1 A 1 2: 1 B 1 3: 1 A 2 4: 1 C 2 5: 1 A 3 6: 1 D 3 7: 1 B 4 8: 1 C 4 9: 1 B 5 10: 1 D 5 11: 1 C 6 12: 1 D 6 13: 1 A 7 14: 1 B 7 15: 1 C 7 16: 1 A 8 17: 1 B 8 18: 1 D 8 19: 1 A 9 20: 1 C 9 21: 1 D 9 22: 1 B 10 23: 1 C 10 24: 1 D 10 25: 2 A 11 26: 2 B 11 27: 2 A 12 28: 2 C 12 29: 2 B 13 30: 2 C 13
#3
0
Here is a base solution. Comments inline.
这是一个基本解。内联的评论。
#for each length (not incl. 1 and number of unique values), create all possible combinations using combn
combiLs <- by(dat, dat$Group, function(x) {
#number of elements to choose excl. 1 and all values
idx <- seq_along(x$Value)[-c(1, nrow(x))]
do.call(rbind, lapply(idx, function(m) {
#for each number of elements, generate all combinations
sets <- combn(x$Value, m, simplify=FALSE)
#get OP's desired format
combi <- rep(seq_along(sets), each=m)
data.frame(
Group=x$Group[1],
Combi=paste(x$Group[1], combi, sep="."),
Value=unlist(sets))
}))
})
#final output
do.call(rbind, combiLs)
Another possible data.table
implementation using both choose
and combn
simultaneously:
另一个可能的数据。表实现同时使用choose和combn:
res <- setDT(dat)[, {
idx <- seq_along(Value)[-c(1, .N)]
list(
Set=paste0(Group[1], ".", unlist(lapply(idx, function(m) rep(seq_len(choose(.N, m)), each=m)))),
Value=unlist(lapply(idx, function(m) as.vector(combn(Value, m))))
)
}, by=Group]
res[, Combi := unlist(Map(rep, x=seq_along(rle(Set)$values), each=rle(Set)$lengths))]
data:
数据:
dat <- data.frame(Group=c(rep(1,4), rep(2,3)), Value=c("A","B","C","D","A","B","C"))
dat
#1
2
Here is a general tidyverse solution, should work with sets of values that have more than 3 items.
这里有一个通用的tidyverse解决方案,应该使用具有3个以上项的值集。
The idea is to use combn
(with m = 2 then 3 etc) and format the output as a tibble
for different Group
and m
values. From there we can use tidyverse
functions map_dfr
and unnest
. Finally as we have multiple ids rather than one, we build a table of unique ids, build the unique combi
id and join it back to our result.
其思想是使用combn (m = 2,然后是3)并将输出格式化为针对不同组和m值的tibblefor。从那里我们可以使用tidyverse函数map_dfr和unnest。最后,当我们拥有多个id而不是一个id时,我们构建一个惟一id表,构建惟一的combi id并将其连接到结果。
# convenience fonction to store combinations in a long format
combi_as_tibble <- function(n,values) combn(values,n) %>%
{tibble(id = rep(seq(ncol(.)),each=nrow(.)),Value=c(.))}
combi_as_tibble(2,letters[1:3]) # example
# # A tibble: 6 x 2
# id Value
# <chr> <chr>
# 1 1 a
# 2 1 b
# 3 2 a
# 4 2 c
# 5 3 b
# 6 3 c
df1 %>% group_by(Group) %>%
summarize(combis = list(
map_dfr(2:(length(unique(Value))-1),combi_as_tibble,Value,.id="id2")
)) %>% # by Group, build a long tibble with all combinations
unnest %>% # unnest to get a long unnested table
left_join(.,select(.,Group,id2,id) %>% distinct %>% mutate(combi=row_number())
) %>% # build combi ids
select(Group,Value,combi) %>%
as.data.frame
# Group Value combi
# 1 1 A 1
# 2 1 B 1
# 3 1 A 2
# 4 1 C 2
# 5 1 A 3
# 6 1 D 3
# 7 1 B 4
# 8 1 C 4
# 9 1 B 5
# 10 1 D 5
# 11 1 C 6
# 12 1 D 6
# 13 1 A 7
# 14 1 B 7
# 15 1 C 7
# 16 1 A 8
# 17 1 B 8
# 18 1 D 8
# 19 1 A 9
# 20 1 C 9
# 21 1 D 9
# 22 1 B 10
# 23 1 C 10
# 24 1 D 10
# 25 2 A 11
# 26 2 B 11
# 27 2 A 12
# 28 2 C 12
# 29 2 B 13
# 30 2 C 13
data
数据
df1 <- read.table(text="Group Value
1 A
1 B
1 C
1 D
2 A
2 B
2 C",h=T,strin=F)
#2
0
A possible solution with data.table
:
有数据的可能解决方案。
library(data.table)
setDT(dat)[, .(Value = {n <- 2:(uniqueN(Value)-1);
unlist(lapply(n, function(x) combn(Value, x)))})
, by = Group
][, Combi := cumsum(c(1, diff(match(Value, LETTERS)) < 0))][]
which gives:
这使:
Group Value Combi 1: 1 A 1 2: 1 B 1 3: 1 A 2 4: 1 C 2 5: 1 A 3 6: 1 D 3 7: 1 B 4 8: 1 C 4 9: 1 B 5 10: 1 D 5 11: 1 C 6 12: 1 D 6 13: 1 A 7 14: 1 B 7 15: 1 C 7 16: 1 A 8 17: 1 B 8 18: 1 D 8 19: 1 A 9 20: 1 C 9 21: 1 D 9 22: 1 B 10 23: 1 C 10 24: 1 D 10 25: 2 A 11 26: 2 B 11 27: 2 A 12 28: 2 C 12 29: 2 B 13 30: 2 C 13
#3
0
Here is a base solution. Comments inline.
这是一个基本解。内联的评论。
#for each length (not incl. 1 and number of unique values), create all possible combinations using combn
combiLs <- by(dat, dat$Group, function(x) {
#number of elements to choose excl. 1 and all values
idx <- seq_along(x$Value)[-c(1, nrow(x))]
do.call(rbind, lapply(idx, function(m) {
#for each number of elements, generate all combinations
sets <- combn(x$Value, m, simplify=FALSE)
#get OP's desired format
combi <- rep(seq_along(sets), each=m)
data.frame(
Group=x$Group[1],
Combi=paste(x$Group[1], combi, sep="."),
Value=unlist(sets))
}))
})
#final output
do.call(rbind, combiLs)
Another possible data.table
implementation using both choose
and combn
simultaneously:
另一个可能的数据。表实现同时使用choose和combn:
res <- setDT(dat)[, {
idx <- seq_along(Value)[-c(1, .N)]
list(
Set=paste0(Group[1], ".", unlist(lapply(idx, function(m) rep(seq_len(choose(.N, m)), each=m)))),
Value=unlist(lapply(idx, function(m) as.vector(combn(Value, m))))
)
}, by=Group]
res[, Combi := unlist(Map(rep, x=seq_along(rle(Set)$values), each=rle(Set)$lengths))]
data:
数据:
dat <- data.frame(Group=c(rep(1,4), rep(2,3)), Value=c("A","B","C","D","A","B","C"))
dat