如何计算特定的row_number

时间:2022-08-04 15:30:47

I have data:-

我有数据: -

name  row
a      1
a      2 
a      3
a      4
a      5
b      1
b      2
b      3
b      4
b      5
b      6
b      7
b      8
b      9
b      10
b      11
b      12
b      13
b      14
b      15
.......

It's grouped by name.

它按名称分组。

Row is column with row_number of grouped name. I need calculate new column with value that If the value in the row column is greater than 11, start counting again.

Row是具有分组名称的row_number的列。我需要计算新列的值,如果行列中的值大于11,则再次开始计数。

Should look like this:-

应该是这样的: -

name  row   new_row
a      1       1 
a      2       2
a      3       3
a      4       4
a      5       5
b      1       1
b      2       2
b      3       3
b      4       4
b      5       5
b      6       6
b      7       7
b      8       8
b      9       9
b      10       10
b      11       11
b      12       1
b      13       2
b      14       3
b      15       4
 .............

4 个解决方案

#1


3  

Try this, using dplyr:

试试这个,使用dplyr:

df <- data.frame(name=c(rep("a", 5), rep("b", 13)), stringsAsFactors = FALSE)
library(dplyr)
df %>%
  group_by(name) %>%
  mutate(
    row = row_number(),
    new_row = (row - 1L) %% 11L + 1L
  ) %>%
  ungroup()
# # A tibble: 18 × 3
#     name   row new_row
#    <chr> <int>   <int>
# 1      a     1       1
# 2      a     2       2
# 3      a     3       3
# 4      a     4       4
# 5      a     5       5
# 6      b     1       1
# 7      b     2       2
# 8      b     3       3
# 9      b     4       4
# 10     b     5       5
# 11     b     6       6
# 12     b     7       7
# 13     b     8       8
# 14     b     9       9
# 15     b    10      10
# 16     b    11      11
# 17     b    12       1
# 18     b    13       2

Using base-R:

do.call(
  rbind.data.frame,
  by(df, df$name,
     function(x) within(x, {
       row = seq_len(nrow(x))
       new_row = (row - 1L) %% 11L + 1L
     })
  ))
#      name new_row row
# a.1     a       1   1
# a.2     a       2   2
# a.3     a       3   3
# a.4     a       4   4
# a.5     a       5   5
# b.6     b       1   1
# b.7     b       2   2
# b.8     b       3   3
# b.9     b       4   4
# b.10    b       5   5
# b.11    b       6   6
# b.12    b       7   7
# b.13    b       8   8
# b.14    b       9   9
# b.15    b      10  10
# b.16    b      11  11
# b.17    b       1  12
# b.18    b       2  13

#2


2  

Another idea using ave from base R,

使用基础R的ave的另一个想法,

with(df, ave(row, name, FUN = function(i) replace(i, i>11, seq(i[i > 11]))))
#[1]  1  2  3  4  5  1  2  3  4  5  6  7  8  9 10 11  1  2  3  4

Good catch on the limitation and suggestion by @r2evans,

很好地了解了@ r2evans的限制和建议,

with(df, ave(row, name, FUN = function(i) (i-1)%%11+1))

#3


0  

Using the base R functions ave and rep, we can do

使用基本R函数ave和rep,我们可以做到

ave(df$row, df$name, FUN=function(x) rep(1:11, length.out=length(x)))
 [1]  1  2  3  4  5  1  2  3  4  5  6  7  8  9 10 11  1  2  3  4

This uses with reps length.out argument to repeat the sequence (1:11) truncating or repeating according to the length of each group.

这与reps length.out参数一起使用,重复序列(1:11)根据每个组的长度截断或重复。

#4


-1  

Reading part of the input data

读取部分输入数据

df <- read.table(text = "name  row
a      1
a      2 
a      3
a      4
a      5
b      1
b      2
b      3
b      4
b      5
b      6
b      7
b      8
b      9
b      10
b      11
b      12
b      13
b      14
b      15", header = TRUE)

Required output can be achieved using conditional mutate from dplyr package.

使用dplyr包中的条件mutate可以实现所需的输出。

df %>% mutate(newRow = ifelse(row > 11, row - 11, row))

#    name row newRow
# 1     a   1      1
# 2     a   2      2
# 3     a   3      3
# 4     a   4      4
# 5     a   5      5
# 6     b   1      1
# 7     b   2      2
# 8     b   3      3
# 9     b   4      4
# 10    b   5      5
# 11    b   6      6
# 12    b   7      7
# 13    b   8      8
# 14    b   9      9
# 15    b  10     10
# 16    b  11     11
# 17    b  12      1
# 18    b  13      2
# 19    b  14      3
# 20    b  15      4

If the row has number more than 22, then the complex statement ((row-1) %% 11 ) + 1 should be used

如果行的数量大于22,则应使用复杂语句((row-1)%% 11)+ 1

df <- data.frame(name=c(rep("a", 5), rep("b", 23)), row=c(1:5,1:23))

df %>% mutate(newRow = ifelse(row > 11, ((row-1) %% 11 ) + 1, row))
#    name row newRow
# 1     a   1      1
# 2     a   2      2
# 3     a   3      3
# 4     a   4      4
# 5     a   5      5
# 6     b   1      1
# 7     b   2      2
# 8     b   3      3
# 9     b   4      4
# 10    b   5      5
# 11    b   6      6
# 12    b   7      7
# 13    b   8      8
# 14    b   9      9
# 15    b  10     10
# 16    b  11     11
# 17    b  12      1
# 18    b  13      2
# 19    b  14      3
# 20    b  15      4
# 21    b  16      5
# 22    b  17      6
# 23    b  18      7
# 24    b  19      8
# 25    b  20      9
# 26    b  21     10
# 27    b  22     11
# 28    b  23      1

The same output using data.table,

使用data.table的相同输出,

dt <- data.table(df)
dt[, newRow := ifelse(row > 11, ((row-1) %% 11 ) + 1, row)]

#1


3  

Try this, using dplyr:

试试这个,使用dplyr:

df <- data.frame(name=c(rep("a", 5), rep("b", 13)), stringsAsFactors = FALSE)
library(dplyr)
df %>%
  group_by(name) %>%
  mutate(
    row = row_number(),
    new_row = (row - 1L) %% 11L + 1L
  ) %>%
  ungroup()
# # A tibble: 18 × 3
#     name   row new_row
#    <chr> <int>   <int>
# 1      a     1       1
# 2      a     2       2
# 3      a     3       3
# 4      a     4       4
# 5      a     5       5
# 6      b     1       1
# 7      b     2       2
# 8      b     3       3
# 9      b     4       4
# 10     b     5       5
# 11     b     6       6
# 12     b     7       7
# 13     b     8       8
# 14     b     9       9
# 15     b    10      10
# 16     b    11      11
# 17     b    12       1
# 18     b    13       2

Using base-R:

do.call(
  rbind.data.frame,
  by(df, df$name,
     function(x) within(x, {
       row = seq_len(nrow(x))
       new_row = (row - 1L) %% 11L + 1L
     })
  ))
#      name new_row row
# a.1     a       1   1
# a.2     a       2   2
# a.3     a       3   3
# a.4     a       4   4
# a.5     a       5   5
# b.6     b       1   1
# b.7     b       2   2
# b.8     b       3   3
# b.9     b       4   4
# b.10    b       5   5
# b.11    b       6   6
# b.12    b       7   7
# b.13    b       8   8
# b.14    b       9   9
# b.15    b      10  10
# b.16    b      11  11
# b.17    b       1  12
# b.18    b       2  13

#2


2  

Another idea using ave from base R,

使用基础R的ave的另一个想法,

with(df, ave(row, name, FUN = function(i) replace(i, i>11, seq(i[i > 11]))))
#[1]  1  2  3  4  5  1  2  3  4  5  6  7  8  9 10 11  1  2  3  4

Good catch on the limitation and suggestion by @r2evans,

很好地了解了@ r2evans的限制和建议,

with(df, ave(row, name, FUN = function(i) (i-1)%%11+1))

#3


0  

Using the base R functions ave and rep, we can do

使用基本R函数ave和rep,我们可以做到

ave(df$row, df$name, FUN=function(x) rep(1:11, length.out=length(x)))
 [1]  1  2  3  4  5  1  2  3  4  5  6  7  8  9 10 11  1  2  3  4

This uses with reps length.out argument to repeat the sequence (1:11) truncating or repeating according to the length of each group.

这与reps length.out参数一起使用,重复序列(1:11)根据每个组的长度截断或重复。

#4


-1  

Reading part of the input data

读取部分输入数据

df <- read.table(text = "name  row
a      1
a      2 
a      3
a      4
a      5
b      1
b      2
b      3
b      4
b      5
b      6
b      7
b      8
b      9
b      10
b      11
b      12
b      13
b      14
b      15", header = TRUE)

Required output can be achieved using conditional mutate from dplyr package.

使用dplyr包中的条件mutate可以实现所需的输出。

df %>% mutate(newRow = ifelse(row > 11, row - 11, row))

#    name row newRow
# 1     a   1      1
# 2     a   2      2
# 3     a   3      3
# 4     a   4      4
# 5     a   5      5
# 6     b   1      1
# 7     b   2      2
# 8     b   3      3
# 9     b   4      4
# 10    b   5      5
# 11    b   6      6
# 12    b   7      7
# 13    b   8      8
# 14    b   9      9
# 15    b  10     10
# 16    b  11     11
# 17    b  12      1
# 18    b  13      2
# 19    b  14      3
# 20    b  15      4

If the row has number more than 22, then the complex statement ((row-1) %% 11 ) + 1 should be used

如果行的数量大于22,则应使用复杂语句((row-1)%% 11)+ 1

df <- data.frame(name=c(rep("a", 5), rep("b", 23)), row=c(1:5,1:23))

df %>% mutate(newRow = ifelse(row > 11, ((row-1) %% 11 ) + 1, row))
#    name row newRow
# 1     a   1      1
# 2     a   2      2
# 3     a   3      3
# 4     a   4      4
# 5     a   5      5
# 6     b   1      1
# 7     b   2      2
# 8     b   3      3
# 9     b   4      4
# 10    b   5      5
# 11    b   6      6
# 12    b   7      7
# 13    b   8      8
# 14    b   9      9
# 15    b  10     10
# 16    b  11     11
# 17    b  12      1
# 18    b  13      2
# 19    b  14      3
# 20    b  15      4
# 21    b  16      5
# 22    b  17      6
# 23    b  18      7
# 24    b  19      8
# 25    b  20      9
# 26    b  21     10
# 27    b  22     11
# 28    b  23      1

The same output using data.table,

使用data.table的相同输出,

dt <- data.table(df)
dt[, newRow := ifelse(row > 11, ((row-1) %% 11 ) + 1, row)]