按多个列的级别计数

时间:2021-12-24 14:04:17

This is the extension here.
Data looks like:

这是这里的扩展。数据看起来像:

ID   Type    Problem1    Value1     Problem2    Value2    Problem3    Value3
1    A       X           500        Y           1000      Z           400
2    A       X           600        Z           700       
3    B       Y           700        Z           100
4    B       W           200        V           200
5    C       Z           500        V           500       
6    C       X           1000       W           100       V           900

The result I want is:

我想要的结果是:

Type    X     Y     Z     W     V
A       2     1     2     0     0  
B       0     1     1     1     1
C       1     0     1     1     2

I want to count the number under each groups, how can I do?

我想算一下每组人数,我该怎么办?

# data
dt <- fread("
ID   Type    Problem1    Value1     Problem2    Value2    Problem3    Value3
1    A       X           500        Y           1000      Z           400
2    A       X           600        Z           700       
3    B       Y           700        Z           100
4    B       W           200        V           200
5    C       Z           500        V           500       
6    C       X           1000       W           100       V           900", fill = T)  

I try this:

我试试这个:

dcast(melt(dt, measure = patterns("^Value", "^Problem"), 
        value.name = c("Value", "Problem"))[Problem != ""
      ][, Problem := factor(Problem, levels = c("X", "Y", "Z", "W", "V"))], 
       Type ~Problem, value.var = "Value", sum / mean, na.rm = TRUE)

However, it got error.

但是,它有错误。

3 个解决方案

#1


2  

I'd store it in long form (similar to the OP's attempt)...

我会以长篇形式存储它(类似于OP的尝试)......

dt[Problem3 == "", Problem3 := NA]
mDT = melt(dt, 
  id = c("ID", "Type"), 
  meas = patterns("Problem", "Value"), 
  variable.name = "Item",
  value.name = c("Problem", "Value"),
  na.rm = TRUE
)

    ID Type Item Problem Value
 1:  1    A    1       X   500
 2:  2    A    1       X   600
 3:  3    B    1       Y   700
 4:  4    B    1       W   200
 5:  5    C    1       Z   500
 6:  6    C    1       X  1000
 7:  1    A    2       Y  1000
 8:  2    A    2       Z   700
 9:  3    B    2       Z   100
10:  4    B    2       V   200
11:  5    C    2       V   500
12:  6    C    2       W   100
13:  1    A    3       Z   400
14:  6    C    3       V   900

Then it's just casting to wide:

然后它只是投射到宽:

dcast(mDT, Type ~ Problem, fun.agg = length)

   Type V W X Y Z
1:    A 0 0 2 1 2
2:    B 1 1 0 1 1
3:    C 2 1 1 0 1

If you want the columns in a particular order or want to include unobserved levels of Problem, you can use a factor (as the OP did):

如果您希望列按特定顺序或希望包含未观察到的问题级别,则可以使用因子(如OP所做的那样):

dcast(mDT, Type ~ factor(Problem, levels=c("X","Y","Z","W","V")), fun.agg = length)

# or more permanently

mDT[, Problem := factor(Problem, levels=c("X","Y","Z","W","V"))]
dcast(mDT, Type ~ Problem, fun.agg = length)

#2


1  

You can try a dplyr solution:

您可以尝试使用dplyr解决方案:

library(tidyverse)
dt %>% 
  select(Type, starts_with("Problem")) %>% 
  gather(key, value, -Type) %>% 
  group_by(Type) %>% 
  filter(!value=="") %>% 
  count(value) %>% 
  spread(value, n, fill = 0)
# A tibble: 3 x 6
# Groups:   Type [3]
   Type     V     W     X     Y     Z
* <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1     A     0     0     2     1     2
2     B     1     1     0     1     1
3     C     2     1     1     0     1

#3


1  

# data
dt <- fread("
            ID   Type    Problem1    Value1     Problem2    Value2    Problem3    Value3
            1    A       X           500        Y           1000      Z           400
            2    A       X           600        Z           700       
            3    B       Y           700        Z           100
            4    B       W           200        V           200
            5    C       Z           500        V           500       
            6    C       X           1000       W           100       V           900", fill = T) 

dt <- dt[,c("Type", "Problem1", "Problem2", "Problem3")]
dt <- melt(dt, id = "Type")
cnt <- dt[,.(freq = .N), by = .(Type, value)]

mat <- reshape(cnt, idvar = "Type", timevar = "value", direction = "wide")
> mat
    Type freq.X freq.Y freq.W freq.Z freq.V freq.
1:    A      2      1     NA      2     NA     1
2:    B     NA      1      1      1      1     2
3:    C      1     NA      1      1      2     1

#1


2  

I'd store it in long form (similar to the OP's attempt)...

我会以长篇形式存储它(类似于OP的尝试)......

dt[Problem3 == "", Problem3 := NA]
mDT = melt(dt, 
  id = c("ID", "Type"), 
  meas = patterns("Problem", "Value"), 
  variable.name = "Item",
  value.name = c("Problem", "Value"),
  na.rm = TRUE
)

    ID Type Item Problem Value
 1:  1    A    1       X   500
 2:  2    A    1       X   600
 3:  3    B    1       Y   700
 4:  4    B    1       W   200
 5:  5    C    1       Z   500
 6:  6    C    1       X  1000
 7:  1    A    2       Y  1000
 8:  2    A    2       Z   700
 9:  3    B    2       Z   100
10:  4    B    2       V   200
11:  5    C    2       V   500
12:  6    C    2       W   100
13:  1    A    3       Z   400
14:  6    C    3       V   900

Then it's just casting to wide:

然后它只是投射到宽:

dcast(mDT, Type ~ Problem, fun.agg = length)

   Type V W X Y Z
1:    A 0 0 2 1 2
2:    B 1 1 0 1 1
3:    C 2 1 1 0 1

If you want the columns in a particular order or want to include unobserved levels of Problem, you can use a factor (as the OP did):

如果您希望列按特定顺序或希望包含未观察到的问题级别,则可以使用因子(如OP所做的那样):

dcast(mDT, Type ~ factor(Problem, levels=c("X","Y","Z","W","V")), fun.agg = length)

# or more permanently

mDT[, Problem := factor(Problem, levels=c("X","Y","Z","W","V"))]
dcast(mDT, Type ~ Problem, fun.agg = length)

#2


1  

You can try a dplyr solution:

您可以尝试使用dplyr解决方案:

library(tidyverse)
dt %>% 
  select(Type, starts_with("Problem")) %>% 
  gather(key, value, -Type) %>% 
  group_by(Type) %>% 
  filter(!value=="") %>% 
  count(value) %>% 
  spread(value, n, fill = 0)
# A tibble: 3 x 6
# Groups:   Type [3]
   Type     V     W     X     Y     Z
* <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1     A     0     0     2     1     2
2     B     1     1     0     1     1
3     C     2     1     1     0     1

#3


1  

# data
dt <- fread("
            ID   Type    Problem1    Value1     Problem2    Value2    Problem3    Value3
            1    A       X           500        Y           1000      Z           400
            2    A       X           600        Z           700       
            3    B       Y           700        Z           100
            4    B       W           200        V           200
            5    C       Z           500        V           500       
            6    C       X           1000       W           100       V           900", fill = T) 

dt <- dt[,c("Type", "Problem1", "Problem2", "Problem3")]
dt <- melt(dt, id = "Type")
cnt <- dt[,.(freq = .N), by = .(Type, value)]

mat <- reshape(cnt, idvar = "Type", timevar = "value", direction = "wide")
> mat
    Type freq.X freq.Y freq.W freq.Z freq.V freq.
1:    A      2      1     NA      2     NA     1
2:    B     NA      1      1      1      1     2
3:    C      1     NA      1      1      2     1