使用R语言按月分组

时间:2022-06-18 07:35:25

Using dput, i provide data.

使用dput,我提供数据。

df <- 
  structure(list(Goods = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                     1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                     2L, 2L, 2L, 2L, 2L, 2L), 
                                   .Label = c("IceScream", "Kex"), class = "factor"), 
                 date = structure(c(1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                    3L, 4L, 4L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 
                                    3L, 3L, 3L, 3L, 4L, 4L), 
                                  .Label = c("12.01.2015", "13.01.2015", 
                                             "14.01.2015", "15.01.2015"), class = "factor"), 
                 Y = c(200L, 50L, 100L, 50L, 200L, 200L, 50L, 200L, 100L, 1000L, 
                       1000L, 50L, 50L, 100L, 200L, 50L, 23L, 50L, 200L, 200L, 
                       45L, 200L, 100L, 6L, 23L, 50L, 50L, 436L)), 
            .Names = c("Goods", "date", "Y"), class = "data.frame", 
            row.names = c(NA, -28L))

I want aggregate by months data.

我想要按月数据汇总。

library(dplyr)
library(lubridate)
library(zoo)

df <- df %>%
  group_by(yearMon = as.yearmon(dmy(date))) %>% 
  summarise(new = sum(new))

But i have group of goods(icescream and kex). How to get aggregation by months for each group?

但我有一组货物(icescream和kex)。如何按月分组聚集?

Something like this

像这样的东西

Goods        date         Y
IceScream   jan-2015    3350
Kex         jan-2015    1633

4 个解决方案

#1


1  

Maybe this using base function rather than other package (except dplyr):

也许这使用基本函数而不是其他包(除了dplyr):

df %>%
  group_by(Goods, yearMon = format(as.Date(date,'%d.%m.%Y'),'%b-%Y')) %>% 
  summarise(new = sum(Y))
# A tibble: 2 x 3
# Groups:   Goods [?]
  Goods     yearMon    new
  <fct>     <chr>    <int>
1 IceScream jan-2015  3350
2 Kex       jan-2015  1633

EDIT
More basic:

编辑更基本:

df$yearMon <- format(as.Date(df$date,'%d.%m.%Y'),'%b-%Y')
aggregate(cbind(Y) ~ Goods + yearMon, data = df, sum, na.rm = TRUE)

      Goods  yearMon    Y
1 IceScream gen-2015 3350
2       Kex gen-2015 1633

EDIT 2:
To have by week as 1-jan etc., you can try this:

编辑2:按周1和1等,你可以试试这个:

 library(lubridate)
df %>%
  group_by(Goods, week =paste(ceiling(day(as.Date(date,'%d.%m.%Y')) / 7),month(as.Date(date,'%d.%m.%Y'), label = TRUE, abbr = TRUE),sep='-')
) %>% 
  summarise(new = sum(Y))

# A tibble: 4 x 3
# Groups:   Goods [?]
  Goods     week    new
  <fct>     <chr> <int>
1 IceScream 2-jan  3200
2 IceScream 3-jan   150
3 Kex       2-jan  1147
4 Kex       3-jan   486

#2


2  

Ok, I see that you actually want to summarize by the amount of Y, my bad:

好吧,我看到你真的想用Y的数量总结,我的坏:

df2<-df %>% group_by( yearMon = as.yearmon(dmy(date)), Goods ) %>% 
summarise(new = sum(Y))

df2

# A tibble: 2 x 3
# Groups:   yearMon [?]
yearMon       Goods       new
<S3: yearmon> <fct>     <int>
1 Jan 2015      IceScream  3350
2 Jan 2015      Kex        1633

If you want to summarize by weeks use:

如果您想按周使用总结:

df2 <- df %>% group_by(Goods,week = week(dmy(date)), ) %>% summarise(new =sum(Y))

which gives:

> df2
# A tibble: 4 x 3
# Groups:   Goods [?]
Goods      week   new
<fct>     <dbl> <int>
1 IceScream     2  3200
2 IceScream     3   150
3 Kex           2  1147
4 Kex           3   486
> 

#3


1  

Using sqldf:

df$date= as.POSIXct(df$date, format="%d.%m.%Y") # Convert date to POSIXct
df$date=format(as.Date(df$date), "%Y-%m") # Extract year and month
library(sqldf) # Using sqldf group by date and Goods
sqldf("select Goods,date,sum(Y) from df group by date,Goods")

Output:

   Goods    date      sum(Y)
1 IceScream 2015-01   3350
2       Kex 2015-01   1633

#4


0  

If you're not sure how you want toaggregate your date, first create separate columns with week, month and year and thenn it is easier to test different versions:

如果您不确定要如何聚合日期,请先创建包含周,月和年的单独列,然后更容易测试不同的版本:

library(dplyr)
library(lubridate)

df <- df %>% 
  mutate(date = dmy(date), 
         week = week(date),
         month = month(date, label = T), 
         year = year(date))

df
#    Goods       date    Y week month year
# 1  IceScream 2015-01-12  200    2   Jan 2015
# 2  IceScream 2015-01-12   50    2   Jan 2015
# 3  IceScream 2015-01-13  100    2   Jan 2015
# 4  IceScream 2015-01-13   50    2   Jan 2015
# 5  IceScream 2015-01-13  200    2   Jan 2015
# 6  IceScream 2015-01-14  200    2   Jan 2015

Grouped by year and month only:

仅按年份和月份分组:

df %>% 
  group_by(Goods, year, month) %>% 
  summarise(sum_y = sum(Y))

# A tibble: 2 x 4
# Groups:   Goods, year [?]
#   Goods      year month sum_y
#   <fct>     <dbl> <ord> <int>
# 1 IceScream  2015 Jan    3350
# 2 Kex        2015 Jan    1633

Grouped by year month and week:

按月和周分组:

df %>% 
  group_by(Goods, year, month, week) %>% 
  summarise(sum_y = sum(Y))

# A tibble: 4 x 5
# Groups:   Goods, year, month [?]
#   Goods      year month  week sum_y
#   <fct>     <dbl> <ord> <dbl> <int>
# 1 IceScream  2015 Jan       2  3200
# 2 IceScream  2015 Jan       3   150
# 3 Kex        2015 Jan       2  1147
# 4 Kex        2015 Jan       3   486

#1


1  

Maybe this using base function rather than other package (except dplyr):

也许这使用基本函数而不是其他包(除了dplyr):

df %>%
  group_by(Goods, yearMon = format(as.Date(date,'%d.%m.%Y'),'%b-%Y')) %>% 
  summarise(new = sum(Y))
# A tibble: 2 x 3
# Groups:   Goods [?]
  Goods     yearMon    new
  <fct>     <chr>    <int>
1 IceScream jan-2015  3350
2 Kex       jan-2015  1633

EDIT
More basic:

编辑更基本:

df$yearMon <- format(as.Date(df$date,'%d.%m.%Y'),'%b-%Y')
aggregate(cbind(Y) ~ Goods + yearMon, data = df, sum, na.rm = TRUE)

      Goods  yearMon    Y
1 IceScream gen-2015 3350
2       Kex gen-2015 1633

EDIT 2:
To have by week as 1-jan etc., you can try this:

编辑2:按周1和1等,你可以试试这个:

 library(lubridate)
df %>%
  group_by(Goods, week =paste(ceiling(day(as.Date(date,'%d.%m.%Y')) / 7),month(as.Date(date,'%d.%m.%Y'), label = TRUE, abbr = TRUE),sep='-')
) %>% 
  summarise(new = sum(Y))

# A tibble: 4 x 3
# Groups:   Goods [?]
  Goods     week    new
  <fct>     <chr> <int>
1 IceScream 2-jan  3200
2 IceScream 3-jan   150
3 Kex       2-jan  1147
4 Kex       3-jan   486

#2


2  

Ok, I see that you actually want to summarize by the amount of Y, my bad:

好吧,我看到你真的想用Y的数量总结,我的坏:

df2<-df %>% group_by( yearMon = as.yearmon(dmy(date)), Goods ) %>% 
summarise(new = sum(Y))

df2

# A tibble: 2 x 3
# Groups:   yearMon [?]
yearMon       Goods       new
<S3: yearmon> <fct>     <int>
1 Jan 2015      IceScream  3350
2 Jan 2015      Kex        1633

If you want to summarize by weeks use:

如果您想按周使用总结:

df2 <- df %>% group_by(Goods,week = week(dmy(date)), ) %>% summarise(new =sum(Y))

which gives:

> df2
# A tibble: 4 x 3
# Groups:   Goods [?]
Goods      week   new
<fct>     <dbl> <int>
1 IceScream     2  3200
2 IceScream     3   150
3 Kex           2  1147
4 Kex           3   486
> 

#3


1  

Using sqldf:

df$date= as.POSIXct(df$date, format="%d.%m.%Y") # Convert date to POSIXct
df$date=format(as.Date(df$date), "%Y-%m") # Extract year and month
library(sqldf) # Using sqldf group by date and Goods
sqldf("select Goods,date,sum(Y) from df group by date,Goods")

Output:

   Goods    date      sum(Y)
1 IceScream 2015-01   3350
2       Kex 2015-01   1633

#4


0  

If you're not sure how you want toaggregate your date, first create separate columns with week, month and year and thenn it is easier to test different versions:

如果您不确定要如何聚合日期,请先创建包含周,月和年的单独列,然后更容易测试不同的版本:

library(dplyr)
library(lubridate)

df <- df %>% 
  mutate(date = dmy(date), 
         week = week(date),
         month = month(date, label = T), 
         year = year(date))

df
#    Goods       date    Y week month year
# 1  IceScream 2015-01-12  200    2   Jan 2015
# 2  IceScream 2015-01-12   50    2   Jan 2015
# 3  IceScream 2015-01-13  100    2   Jan 2015
# 4  IceScream 2015-01-13   50    2   Jan 2015
# 5  IceScream 2015-01-13  200    2   Jan 2015
# 6  IceScream 2015-01-14  200    2   Jan 2015

Grouped by year and month only:

仅按年份和月份分组:

df %>% 
  group_by(Goods, year, month) %>% 
  summarise(sum_y = sum(Y))

# A tibble: 2 x 4
# Groups:   Goods, year [?]
#   Goods      year month sum_y
#   <fct>     <dbl> <ord> <int>
# 1 IceScream  2015 Jan    3350
# 2 Kex        2015 Jan    1633

Grouped by year month and week:

按月和周分组:

df %>% 
  group_by(Goods, year, month, week) %>% 
  summarise(sum_y = sum(Y))

# A tibble: 4 x 5
# Groups:   Goods, year, month [?]
#   Goods      year month  week sum_y
#   <fct>     <dbl> <ord> <dbl> <int>
# 1 IceScream  2015 Jan       2  3200
# 2 IceScream  2015 Jan       3   150
# 3 Kex        2015 Jan       2  1147
# 4 Kex        2015 Jan       3   486