如何为R中的列中的元素分配顺序？

In the following dataset:

在以下数据集中：

Day   Place   Name
 22     X      A
 22     X      A
 22     X      B
 22     X      A
 22     Y      C
 22     Y      C
 22     Y      D
 23     X      B
 23     X      A

How can I assign numbering to the variable Name in following order using R:

如何使用R按以下顺序为变量Name指定编号：

Day   Place   Name  Number
 22     X      A     1
 22     X      A     1
 22     X      B     2
 22     X      A     1
 22     Y      C     1
 22     Y      C     1
 22     Y      D     2
 23     X      B     1
 23     X      A     2

In a nutshell, I need to number the names according to their order to occurrence on a certain day and at a certain place.

简而言之，我需要根据他们在某一天和某个地方发生的顺序对名称进行编号。

3 个解决方案

#1

In base R using tapply:

在基础R中使用tapply：

dat$Number <- 
unlist(tapply(dat$Name,paste(dat$Day,dat$Place),
       FUN=function(x){
         y <- as.character(x)
         as.integer(factor(y,levels=unique(y)))
}))

#    Day Place Name Number
# 1  22     X    A      1
# 2  22     X    A      1
# 3  22     X    B      2
# 4  22     Y    C      1
# 5  22     Y    C      1
# 6  22     Y    D      2
# 7  23     X    B      1
# 8  23     X    A      2

idea

Group by Day and Place using tapply
按日期和地点分组使用tapply
For each group, create a coerce the Name to the factor conserving the same order of levels.
对于每个组，创建一个强制名称到保留相同级别的因子。
Coerce the created factor to integer to get the final result.
将创建的因子强制转换为整数以获得最终结果。

using data.table(sugar syntax) :

library(data.table)
setDT(dat)[,Number := {
  y <- as.character(Name)
  as.integer(factor(y,levels=unique(y)))
                   },"Day,Place"]

   Day Place Name Number
1:  22     X    A      1
2:  22     X    A      1
3:  22     X    B      2
4:  22     Y    C      1
5:  22     Y    C      1
6:  22     Y    D      2
7:  23     X    B      1
8:  23     X    A      2

#2

idx <- function(x) cumsum(c(TRUE, tail(x, -1) != head(x, -1)))
transform(dat, Number = ave(idx(Name), Day, Place, FUN = idx))

#   Day Place Name Number
# 1  22     X    A      1
# 2  22     X    A      1
# 3  22     X    B      2
# 4  22     Y    C      1
# 5  22     Y    C      1
# 6  22     Y    D      2
# 7  23     X    B      1
# 8  23     X    A      2

#3

Use ddply from plyr.

使用plyr的ddply。

dfr <- read.table(header = TRUE, text = "Day   Place   Name
 22     X      A
 22     X      A
 22     X      B
 22     X      A
 22     Y      C
 22     Y      C
 22     Y      D
 23     X      B
 23     X      A")

library(plyr)
ddply(
  dfr,
  .(Day, Place),
  mutate,
  Number = as.integer(factor(Name, levels = unique(Name)))
)

Or use dplyr, in a variant of beginneR's deleted answer.

或者在beginneR的删除答案的变体中使用dplyr。

library(dplyr)
dfr %>%
  group_by(Day, Place) %>% 
  mutate(Number = as.integer(factor(Name, levels = unique(Name))))

#1

In base R using tapply:

在基础R中使用tapply：

dat$Number <- 
unlist(tapply(dat$Name,paste(dat$Day,dat$Place),
       FUN=function(x){
         y <- as.character(x)
         as.integer(factor(y,levels=unique(y)))
}))

#    Day Place Name Number
# 1  22     X    A      1
# 2  22     X    A      1
# 3  22     X    B      2
# 4  22     Y    C      1
# 5  22     Y    C      1
# 6  22     Y    D      2
# 7  23     X    B      1
# 8  23     X    A      2

idea

Group by Day and Place using tapply
按日期和地点分组使用tapply
For each group, create a coerce the Name to the factor conserving the same order of levels.
对于每个组，创建一个强制名称到保留相同级别的因子。
Coerce the created factor to integer to get the final result.
将创建的因子强制转换为整数以获得最终结果。

using data.table(sugar syntax) :

library(data.table)
setDT(dat)[,Number := {
  y <- as.character(Name)
  as.integer(factor(y,levels=unique(y)))
                   },"Day,Place"]

   Day Place Name Number
1:  22     X    A      1
2:  22     X    A      1
3:  22     X    B      2
4:  22     Y    C      1
5:  22     Y    C      1
6:  22     Y    D      2
7:  23     X    B      1
8:  23     X    A      2

#2

idx <- function(x) cumsum(c(TRUE, tail(x, -1) != head(x, -1)))
transform(dat, Number = ave(idx(Name), Day, Place, FUN = idx))

#   Day Place Name Number
# 1  22     X    A      1
# 2  22     X    A      1
# 3  22     X    B      2
# 4  22     Y    C      1
# 5  22     Y    C      1
# 6  22     Y    D      2
# 7  23     X    B      1
# 8  23     X    A      2

#3

Use ddply from plyr.

使用plyr的ddply。

dfr <- read.table(header = TRUE, text = "Day   Place   Name
 22     X      A
 22     X      A
 22     X      B
 22     X      A
 22     Y      C
 22     Y      C
 22     Y      D
 23     X      B
 23     X      A")

library(plyr)
ddply(
  dfr,
  .(Day, Place),
  mutate,
  Number = as.integer(factor(Name, levels = unique(Name)))
)

Or use dplyr, in a variant of beginneR's deleted answer.

或者在beginneR的删除答案的变体中使用dplyr。

library(dplyr)
dfr %>%
  group_by(Day, Place) %>% 
  mutate(Number = as.integer(factor(Name, levels = unique(Name))))

秒客网

如何为R中的列中的元素分配顺序？

3 个解决方案

#1

idea

using data.table(sugar syntax) :

#2

#3

#1

idea

using data.table(sugar syntax) :

#2

#3

相关文章