This is the extension here.
Data looks like:
这是这里的扩展。数据看起来像:
ID Type Problem1 Value1 Problem2 Value2 Problem3 Value3
1 A X 500 Y 1000 Z 400
2 A X 600 Z 700
3 B Y 700 Z 100
4 B W 200 V 200
5 C Z 500 V 500
6 C X 1000 W 100 V 900
The result I want is:
我想要的结果是:
Type X Y Z W V
A 2 1 2 0 0
B 0 1 1 1 1
C 1 0 1 1 2
I want to count the number under each groups, how can I do?
我想算一下每组人数,我该怎么办?
# data
dt <- fread("
ID Type Problem1 Value1 Problem2 Value2 Problem3 Value3
1 A X 500 Y 1000 Z 400
2 A X 600 Z 700
3 B Y 700 Z 100
4 B W 200 V 200
5 C Z 500 V 500
6 C X 1000 W 100 V 900", fill = T)
I try this:
我试试这个:
dcast(melt(dt, measure = patterns("^Value", "^Problem"),
value.name = c("Value", "Problem"))[Problem != ""
][, Problem := factor(Problem, levels = c("X", "Y", "Z", "W", "V"))],
Type ~Problem, value.var = "Value", sum / mean, na.rm = TRUE)
However, it got error.
但是,它有错误。
3 个解决方案
#1
2
I'd store it in long form (similar to the OP's attempt)...
我会以长篇形式存储它(类似于OP的尝试)......
dt[Problem3 == "", Problem3 := NA]
mDT = melt(dt,
id = c("ID", "Type"),
meas = patterns("Problem", "Value"),
variable.name = "Item",
value.name = c("Problem", "Value"),
na.rm = TRUE
)
ID Type Item Problem Value
1: 1 A 1 X 500
2: 2 A 1 X 600
3: 3 B 1 Y 700
4: 4 B 1 W 200
5: 5 C 1 Z 500
6: 6 C 1 X 1000
7: 1 A 2 Y 1000
8: 2 A 2 Z 700
9: 3 B 2 Z 100
10: 4 B 2 V 200
11: 5 C 2 V 500
12: 6 C 2 W 100
13: 1 A 3 Z 400
14: 6 C 3 V 900
Then it's just casting to wide:
然后它只是投射到宽:
dcast(mDT, Type ~ Problem, fun.agg = length)
Type V W X Y Z
1: A 0 0 2 1 2
2: B 1 1 0 1 1
3: C 2 1 1 0 1
If you want the columns in a particular order or want to include unobserved levels of Problem
, you can use a factor (as the OP did):
如果您希望列按特定顺序或希望包含未观察到的问题级别,则可以使用因子(如OP所做的那样):
dcast(mDT, Type ~ factor(Problem, levels=c("X","Y","Z","W","V")), fun.agg = length)
# or more permanently
mDT[, Problem := factor(Problem, levels=c("X","Y","Z","W","V"))]
dcast(mDT, Type ~ Problem, fun.agg = length)
#2
1
You can try a dplyr
solution:
您可以尝试使用dplyr解决方案:
library(tidyverse)
dt %>%
select(Type, starts_with("Problem")) %>%
gather(key, value, -Type) %>%
group_by(Type) %>%
filter(!value=="") %>%
count(value) %>%
spread(value, n, fill = 0)
# A tibble: 3 x 6
# Groups: Type [3]
Type V W X Y Z
* <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 0 0 2 1 2
2 B 1 1 0 1 1
3 C 2 1 1 0 1
#3
1
# data
dt <- fread("
ID Type Problem1 Value1 Problem2 Value2 Problem3 Value3
1 A X 500 Y 1000 Z 400
2 A X 600 Z 700
3 B Y 700 Z 100
4 B W 200 V 200
5 C Z 500 V 500
6 C X 1000 W 100 V 900", fill = T)
dt <- dt[,c("Type", "Problem1", "Problem2", "Problem3")]
dt <- melt(dt, id = "Type")
cnt <- dt[,.(freq = .N), by = .(Type, value)]
mat <- reshape(cnt, idvar = "Type", timevar = "value", direction = "wide")
> mat
Type freq.X freq.Y freq.W freq.Z freq.V freq.
1: A 2 1 NA 2 NA 1
2: B NA 1 1 1 1 2
3: C 1 NA 1 1 2 1
#1
2
I'd store it in long form (similar to the OP's attempt)...
我会以长篇形式存储它(类似于OP的尝试)......
dt[Problem3 == "", Problem3 := NA]
mDT = melt(dt,
id = c("ID", "Type"),
meas = patterns("Problem", "Value"),
variable.name = "Item",
value.name = c("Problem", "Value"),
na.rm = TRUE
)
ID Type Item Problem Value
1: 1 A 1 X 500
2: 2 A 1 X 600
3: 3 B 1 Y 700
4: 4 B 1 W 200
5: 5 C 1 Z 500
6: 6 C 1 X 1000
7: 1 A 2 Y 1000
8: 2 A 2 Z 700
9: 3 B 2 Z 100
10: 4 B 2 V 200
11: 5 C 2 V 500
12: 6 C 2 W 100
13: 1 A 3 Z 400
14: 6 C 3 V 900
Then it's just casting to wide:
然后它只是投射到宽:
dcast(mDT, Type ~ Problem, fun.agg = length)
Type V W X Y Z
1: A 0 0 2 1 2
2: B 1 1 0 1 1
3: C 2 1 1 0 1
If you want the columns in a particular order or want to include unobserved levels of Problem
, you can use a factor (as the OP did):
如果您希望列按特定顺序或希望包含未观察到的问题级别,则可以使用因子(如OP所做的那样):
dcast(mDT, Type ~ factor(Problem, levels=c("X","Y","Z","W","V")), fun.agg = length)
# or more permanently
mDT[, Problem := factor(Problem, levels=c("X","Y","Z","W","V"))]
dcast(mDT, Type ~ Problem, fun.agg = length)
#2
1
You can try a dplyr
solution:
您可以尝试使用dplyr解决方案:
library(tidyverse)
dt %>%
select(Type, starts_with("Problem")) %>%
gather(key, value, -Type) %>%
group_by(Type) %>%
filter(!value=="") %>%
count(value) %>%
spread(value, n, fill = 0)
# A tibble: 3 x 6
# Groups: Type [3]
Type V W X Y Z
* <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 0 0 2 1 2
2 B 1 1 0 1 1
3 C 2 1 1 0 1
#3
1
# data
dt <- fread("
ID Type Problem1 Value1 Problem2 Value2 Problem3 Value3
1 A X 500 Y 1000 Z 400
2 A X 600 Z 700
3 B Y 700 Z 100
4 B W 200 V 200
5 C Z 500 V 500
6 C X 1000 W 100 V 900", fill = T)
dt <- dt[,c("Type", "Problem1", "Problem2", "Problem3")]
dt <- melt(dt, id = "Type")
cnt <- dt[,.(freq = .N), by = .(Type, value)]
mat <- reshape(cnt, idvar = "Type", timevar = "value", direction = "wide")
> mat
Type freq.X freq.Y freq.W freq.Z freq.V freq.
1: A 2 1 NA 2 NA 1
2: B NA 1 1 1 1 2
3: C 1 NA 1 1 2 1