I have the following sample data of 5 cases with three repeated measures for two dependent variables "Rapport" and "STRS":
我有以下5个案例的样本数据,其中三个重复测量两个因变量“Rapport”和“STRS”:
df1<-structure(list(SubID = structure(1:5, .Label = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48",
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70",
"71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81",
"82", "83", "84"), class = "factor"), Gender = structure(c(3L,
2L, 3L, 2L, 3L), .Label = c("#NULL!", "1", "2"), class = "factor"),
Age = structure(c(5L, 3L, 2L, 2L, 3L), .Label = c("#NULL!",
"10", "11", "8", "9"), class = "factor"), Rapport.1 = structure(c(22L,
25L, 19L, 10L, 18L), .Label = c("#NULL!", "1.1", "1.85",
"2.45", "2.5", "2.55", "2.6", "2.75", "2.8", "2.85", "2.9",
"2.95", "3.2", "3.25", "3.3", "3.35", "3.4", "3.45", "3.5",
"3.55", "3.6", "3.65", "3.7", "3.75", "3.8", "3.85", "3.9",
"3.95"), class = "factor"), Rapport.2 = structure(c(29L,
31L, 27L, 17L, 9L), .Label = c("#NULL!", "1.25", "1.4", "1.6",
"1.95", "2.05", "2.3", "2.35", "2.45", "2.5", "2.65", "2.7",
"2.75", "2.8", "2.85", "3", "3.05", "3.1", "3.15", "3.2",
"3.35", "3.4", "3.45", "3.5", "3.55", "3.6", "3.65", "3.7",
"3.75", "3.8", "3.85", "3.9", "3.95", "4"), class = "factor"),
Rapport.3 = structure(c(32L, 35L, 22L, 22L, 5L), .Label = c("#NULL!",
"1.35", "1.45", "1.6", "1.75", "1.85", "1.9", "1.95", "2.05",
"2.1", "2.25", "2.3", "2.35", "2.4", "2.45", "2.6", "2.75",
"2.8", "2.9", "2.95", "3", "3.05", "3.1", "3.2", "3.25",
"3.3", "3.35", "3.4", "3.45", "3.5", "3.55", "3.6", "3.7",
"3.75", "3.8", "3.85"), class = "factor"), STRS.1 = structure(c(33L,
10L, 8L, 18L, 29L), .Label = c("#NULL!", "100", "102", "103",
"104", "106", "107", "108", "109", "110", "111", "112", "113",
"114", "115", "116", "117", "118", "119", "120", "122", "123",
"124", "125", "126", "127", "128", "129", "132", "133", "69",
"71", "73", "85", "88", "89", "92", "97", "99"), class = "factor"),
STRS.2 = structure(c(37L, 19L, 9L, 22L, 21L), .Label = c("#NULL!",
"100", "101", "103", "104", "105", "106", "107", "108", "110",
"111", "113", "114", "115", "116", "117", "118", "119", "120",
"121", "122", "123", "124", "125", "126", "127", "128", "129",
"131", "132", "136", "137", "138", "139", "158", "63", "76",
"80", "91", "94", "95", "98", "99"), class = "factor"), STRS.3 = structure(c(31L,
11L, 19L, 23L, 22L), .Label = c("#NULL!", "102", "104", "105",
"106", "107", "108", "109", "110", "111", "112", "114", "117",
"118", "119", "120", "122", "123", "124", "125", "126", "127",
"128", "129", "130", "131", "132", "133", "134", "135", "66",
"70", "75", "81", "85", "87", "88", "94", "98"), class = "factor")), .Names = c("SubID",
"Gender", "Age", "Rapport.1", "Rapport.2", "Rapport.3", "STRS.1",
"STRS.2", "STRS.3"), row.names = c(NA, 5L), class = "data.frame")
To convert the dataset from wide to long form I ran the following code:
要将数据集从宽格式转换为长格式,我运行以下代码:
df2<-reshape(df1, varying = 4:9, sep = ".", direction = 'long')
View(df2)
The result is what I wanted but I don’t want the additional “id” column added in column 8 since it’s just a duplicate of my “SubID” in column 2. I’d also like to know if I need to make sure that I have a separator in my repeater measure variable headings in order for this function to work (e.g., “Rapport.1, Rapport.2, etc.”).
结果是我想要的,但我不希望在第8列中添加额外的“id”列,因为它只是第2列中我的“SubID”的副本。我还想知道我是否需要确保我的转发器测量变量标题中有一个分隔符,以便此功能起作用(例如,“Rapport.1,Rapport.2等”)。
I also want to know how to produce my desired results with the “melt” function in the reshape pkg. and the “gather” function in the tidyr pkg. When I try “melt” below, I get an error and it won’t run:
我还想知道如何使用重塑pkg中的“熔化”功能来产生我想要的结果。以及tidyr pkg中的“聚集”功能。当我在下面尝试“融化”时,我收到一个错误,它将无法运行:
df3<-melt(df1, id.vars=c("SubID","Gender","Age"),
measure.vars=c("Rapport.1","Rapport.2","Rapport.3","STRS.1","STRS.2","STRS.3,
variable.name=c("Rapport","STRS"),
value.name=("Rapport","STRS"))
When I try “gather” below, I get one column that contains the variable names “Rapport.1…” and “STRS.1…” stacked and another column with the values:
当我在下面尝试“收集”时,我得到一个包含变量名称“Rapport.1 ...”和“STRS.1 ...”的列,以及另一个包含值的列:
df4<-gather(df1, Rapport, STRS, Rapport.1:STRS.3)
View(df4)
I know there are benefits to using both of these packages so I’d like to understand how to produce my desired results with the “gather” and “melt” functions. Can anybody assist?
我知道使用这两个软件包都有好处,所以我想了解如何使用“聚集”和“融合”功能生成我想要的结果。有人可以帮忙吗?
2 个解决方案
#1
Here's the tidyr
version. You need to first gather
the columns, then separate
out the time columns and finally spread
the data.
这是tidyr版本。您需要先收集列,然后分离时间列,最后传播数据。
require(tidyr)
df1 %>%
gather(key, value, -c(SubID, Gender, Age)) %>%
separate(key, c("key", "time")) %>%
spread(key, value, convert = TRUE)
## SubID Gender Age time Rapport STRS
## 1 1 2 9 1 3.65 73
## 2 1 2 9 2 3.75 76
## 3 1 2 9 3 3.60 66
## 4 2 1 11 1 3.80 110
## 5 2 1 11 2 3.85 120
## 6 2 1 11 3 3.80 112
## 7 3 2 10 1 3.50 108
## 8 3 2 10 2 3.65 108
## 9 3 2 10 3 3.05 124
## 10 4 1 10 1 2.85 118
## 11 4 1 10 2 3.05 123
## 12 4 1 10 3 3.05 128
## 13 5 2 11 1 3.45 132
## 14 5 2 11 2 2.45 122
## 15 5 2 11 3 1.75 127
#2
Here's a simple solution using the devel data.table
version
这是使用devel data.table版本的简单解决方案
library(data.table) # v >= 1.9.5
melt(setDT(df1), measure = list(4:6, 7:9))
# SubID Gender Age variable value1 value2
# 1: 1 2 9 1 3.65 73
# 2: 2 1 11 1 3.8 110
# 3: 3 2 10 1 3.5 108
# 4: 4 1 10 1 2.85 118
# 5: 5 2 11 1 3.45 132
# 6: 1 2 9 2 3.75 76
# 7: 2 1 11 2 3.85 120
# 8: 3 2 10 2 3.65 108
# 9: 4 1 10 2 3.05 123
# 10: 5 2 11 2 2.45 122
# 11: 1 2 9 3 3.6 66
# 12: 2 1 11 3 3.8 112
# 13: 3 2 10 3 3.05 124
# 14: 4 1 10 3 3.05 128
# 15: 5 2 11 3 1.75 127
#1
Here's the tidyr
version. You need to first gather
the columns, then separate
out the time columns and finally spread
the data.
这是tidyr版本。您需要先收集列,然后分离时间列,最后传播数据。
require(tidyr)
df1 %>%
gather(key, value, -c(SubID, Gender, Age)) %>%
separate(key, c("key", "time")) %>%
spread(key, value, convert = TRUE)
## SubID Gender Age time Rapport STRS
## 1 1 2 9 1 3.65 73
## 2 1 2 9 2 3.75 76
## 3 1 2 9 3 3.60 66
## 4 2 1 11 1 3.80 110
## 5 2 1 11 2 3.85 120
## 6 2 1 11 3 3.80 112
## 7 3 2 10 1 3.50 108
## 8 3 2 10 2 3.65 108
## 9 3 2 10 3 3.05 124
## 10 4 1 10 1 2.85 118
## 11 4 1 10 2 3.05 123
## 12 4 1 10 3 3.05 128
## 13 5 2 11 1 3.45 132
## 14 5 2 11 2 2.45 122
## 15 5 2 11 3 1.75 127
#2
Here's a simple solution using the devel data.table
version
这是使用devel data.table版本的简单解决方案
library(data.table) # v >= 1.9.5
melt(setDT(df1), measure = list(4:6, 7:9))
# SubID Gender Age variable value1 value2
# 1: 1 2 9 1 3.65 73
# 2: 2 1 11 1 3.8 110
# 3: 3 2 10 1 3.5 108
# 4: 4 1 10 1 2.85 118
# 5: 5 2 11 1 3.45 132
# 6: 1 2 9 2 3.75 76
# 7: 2 1 11 2 3.85 120
# 8: 3 2 10 2 3.65 108
# 9: 4 1 10 2 3.05 123
# 10: 5 2 11 2 2.45 122
# 11: 1 2 9 3 3.6 66
# 12: 2 1 11 3 3.8 112
# 13: 3 2 10 3 3.05 124
# 14: 4 1 10 3 3.05 128
# 15: 5 2 11 3 1.75 127