如何为R中的列中的元素分配顺序?

时间:2021-02-25 13:38:04

In the following dataset:

在以下数据集中:

Day   Place   Name
 22     X      A
 22     X      A
 22     X      B
 22     X      A
 22     Y      C
 22     Y      C
 22     Y      D
 23     X      B
 23     X      A

How can I assign numbering to the variable Name in following order using R:

如何使用R按以下顺序为变量Name指定编号:

Day   Place   Name  Number
 22     X      A     1
 22     X      A     1
 22     X      B     2
 22     X      A     1
 22     Y      C     1
 22     Y      C     1
 22     Y      D     2
 23     X      B     1
 23     X      A     2

In a nutshell, I need to number the names according to their order to occurrence on a certain day and at a certain place.

简而言之,我需要根据他们在某一天和某个地方发生的顺序对名称进行编号。

3 个解决方案

#1


3  

In base R using tapply:

在基础R中使用tapply:

dat$Number <- 
unlist(tapply(dat$Name,paste(dat$Day,dat$Place),
       FUN=function(x){
         y <- as.character(x)
         as.integer(factor(y,levels=unique(y)))
}))

#    Day Place Name Number
# 1  22     X    A      1
# 2  22     X    A      1
# 3  22     X    B      2
# 4  22     Y    C      1
# 5  22     Y    C      1
# 6  22     Y    D      2
# 7  23     X    B      1
# 8  23     X    A      2

idea

  1. Group by Day and Place using tapply
  2. 按日期和地点分组使用tapply
  3. For each group, create a coerce the Name to the factor conserving the same order of levels.
  4. 对于每个组,创建一个强制名称到保留相同级别的因子。
  5. Coerce the created factor to integer to get the final result.
  6. 将创建的因子强制转换为整数以获得最终结果。

using data.table(sugar syntax) :

library(data.table)
setDT(dat)[,Number := {
  y <- as.character(Name)
  as.integer(factor(y,levels=unique(y)))
                   },"Day,Place"]

   Day Place Name Number
1:  22     X    A      1
2:  22     X    A      1
3:  22     X    B      2
4:  22     Y    C      1
5:  22     Y    C      1
6:  22     Y    D      2
7:  23     X    B      1
8:  23     X    A      2

#2


1  

idx <- function(x) cumsum(c(TRUE, tail(x, -1) != head(x, -1)))
transform(dat, Number = ave(idx(Name), Day, Place, FUN = idx))

#   Day Place Name Number
# 1  22     X    A      1
# 2  22     X    A      1
# 3  22     X    B      2
# 4  22     Y    C      1
# 5  22     Y    C      1
# 6  22     Y    D      2
# 7  23     X    B      1
# 8  23     X    A      2

#3


1  

Use ddply from plyr.

使用plyr的ddply。

dfr <- read.table(header = TRUE, text = "Day   Place   Name
 22     X      A
 22     X      A
 22     X      B
 22     X      A
 22     Y      C
 22     Y      C
 22     Y      D
 23     X      B
 23     X      A")

library(plyr)
ddply(
  dfr,
  .(Day, Place),
  mutate,
  Number = as.integer(factor(Name, levels = unique(Name)))
)

Or use dplyr, in a variant of beginneR's deleted answer.

或者在beginneR的删除答案的变体中使用dplyr。

library(dplyr)
dfr %>%
  group_by(Day, Place) %>% 
  mutate(Number = as.integer(factor(Name, levels = unique(Name))))

#1


3  

In base R using tapply:

在基础R中使用tapply:

dat$Number <- 
unlist(tapply(dat$Name,paste(dat$Day,dat$Place),
       FUN=function(x){
         y <- as.character(x)
         as.integer(factor(y,levels=unique(y)))
}))

#    Day Place Name Number
# 1  22     X    A      1
# 2  22     X    A      1
# 3  22     X    B      2
# 4  22     Y    C      1
# 5  22     Y    C      1
# 6  22     Y    D      2
# 7  23     X    B      1
# 8  23     X    A      2

idea

  1. Group by Day and Place using tapply
  2. 按日期和地点分组使用tapply
  3. For each group, create a coerce the Name to the factor conserving the same order of levels.
  4. 对于每个组,创建一个强制名称到保留相同级别的因子。
  5. Coerce the created factor to integer to get the final result.
  6. 将创建的因子强制转换为整数以获得最终结果。

using data.table(sugar syntax) :

library(data.table)
setDT(dat)[,Number := {
  y <- as.character(Name)
  as.integer(factor(y,levels=unique(y)))
                   },"Day,Place"]

   Day Place Name Number
1:  22     X    A      1
2:  22     X    A      1
3:  22     X    B      2
4:  22     Y    C      1
5:  22     Y    C      1
6:  22     Y    D      2
7:  23     X    B      1
8:  23     X    A      2

#2


1  

idx <- function(x) cumsum(c(TRUE, tail(x, -1) != head(x, -1)))
transform(dat, Number = ave(idx(Name), Day, Place, FUN = idx))

#   Day Place Name Number
# 1  22     X    A      1
# 2  22     X    A      1
# 3  22     X    B      2
# 4  22     Y    C      1
# 5  22     Y    C      1
# 6  22     Y    D      2
# 7  23     X    B      1
# 8  23     X    A      2

#3


1  

Use ddply from plyr.

使用plyr的ddply。

dfr <- read.table(header = TRUE, text = "Day   Place   Name
 22     X      A
 22     X      A
 22     X      B
 22     X      A
 22     Y      C
 22     Y      C
 22     Y      D
 23     X      B
 23     X      A")

library(plyr)
ddply(
  dfr,
  .(Day, Place),
  mutate,
  Number = as.integer(factor(Name, levels = unique(Name)))
)

Or use dplyr, in a variant of beginneR's deleted answer.

或者在beginneR的删除答案的变体中使用dplyr。

library(dplyr)
dfr %>%
  group_by(Day, Place) %>% 
  mutate(Number = as.integer(factor(Name, levels = unique(Name))))