Using dput
, i provide data.
使用dput,我提供数据。
df <-
structure(list(Goods = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("IceScream", "Kex"), class = "factor"),
date = structure(c(1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L),
.Label = c("12.01.2015", "13.01.2015",
"14.01.2015", "15.01.2015"), class = "factor"),
Y = c(200L, 50L, 100L, 50L, 200L, 200L, 50L, 200L, 100L, 1000L,
1000L, 50L, 50L, 100L, 200L, 50L, 23L, 50L, 200L, 200L,
45L, 200L, 100L, 6L, 23L, 50L, 50L, 436L)),
.Names = c("Goods", "date", "Y"), class = "data.frame",
row.names = c(NA, -28L))
I want aggregate by months data.
我想要按月数据汇总。
library(dplyr)
library(lubridate)
library(zoo)
df <- df %>%
group_by(yearMon = as.yearmon(dmy(date))) %>%
summarise(new = sum(new))
But i have group of goods(icescream and kex). How to get aggregation by months for each group?
但我有一组货物(icescream和kex)。如何按月分组聚集?
Something like this
像这样的东西
Goods date Y
IceScream jan-2015 3350
Kex jan-2015 1633
4 个解决方案
#1
1
Maybe this using base
function rather than other package (except dplyr
):
也许这使用基本函数而不是其他包(除了dplyr):
df %>%
group_by(Goods, yearMon = format(as.Date(date,'%d.%m.%Y'),'%b-%Y')) %>%
summarise(new = sum(Y))
# A tibble: 2 x 3
# Groups: Goods [?]
Goods yearMon new
<fct> <chr> <int>
1 IceScream jan-2015 3350
2 Kex jan-2015 1633
EDIT
More basic:
编辑更基本:
df$yearMon <- format(as.Date(df$date,'%d.%m.%Y'),'%b-%Y')
aggregate(cbind(Y) ~ Goods + yearMon, data = df, sum, na.rm = TRUE)
Goods yearMon Y
1 IceScream gen-2015 3350
2 Kex gen-2015 1633
EDIT 2:
To have by week as 1-jan etc., you can try this:
编辑2:按周1和1等,你可以试试这个:
library(lubridate)
df %>%
group_by(Goods, week =paste(ceiling(day(as.Date(date,'%d.%m.%Y')) / 7),month(as.Date(date,'%d.%m.%Y'), label = TRUE, abbr = TRUE),sep='-')
) %>%
summarise(new = sum(Y))
# A tibble: 4 x 3
# Groups: Goods [?]
Goods week new
<fct> <chr> <int>
1 IceScream 2-jan 3200
2 IceScream 3-jan 150
3 Kex 2-jan 1147
4 Kex 3-jan 486
#2
2
Ok, I see that you actually want to summarize by the amount of Y, my bad:
好吧,我看到你真的想用Y的数量总结,我的坏:
df2<-df %>% group_by( yearMon = as.yearmon(dmy(date)), Goods ) %>%
summarise(new = sum(Y))
df2
# A tibble: 2 x 3
# Groups: yearMon [?]
yearMon Goods new
<S3: yearmon> <fct> <int>
1 Jan 2015 IceScream 3350
2 Jan 2015 Kex 1633
If you want to summarize by weeks use:
如果您想按周使用总结:
df2 <- df %>% group_by(Goods,week = week(dmy(date)), ) %>% summarise(new =sum(Y))
which gives:
> df2
# A tibble: 4 x 3
# Groups: Goods [?]
Goods week new
<fct> <dbl> <int>
1 IceScream 2 3200
2 IceScream 3 150
3 Kex 2 1147
4 Kex 3 486
>
#3
1
Using sqldf
:
df$date= as.POSIXct(df$date, format="%d.%m.%Y") # Convert date to POSIXct
df$date=format(as.Date(df$date), "%Y-%m") # Extract year and month
library(sqldf) # Using sqldf group by date and Goods
sqldf("select Goods,date,sum(Y) from df group by date,Goods")
Output:
Goods date sum(Y)
1 IceScream 2015-01 3350
2 Kex 2015-01 1633
#4
0
If you're not sure how you want toaggregate your date, first create separate columns with week, month and year and thenn it is easier to test different versions:
如果您不确定要如何聚合日期,请先创建包含周,月和年的单独列,然后更容易测试不同的版本:
library(dplyr)
library(lubridate)
df <- df %>%
mutate(date = dmy(date),
week = week(date),
month = month(date, label = T),
year = year(date))
df
# Goods date Y week month year
# 1 IceScream 2015-01-12 200 2 Jan 2015
# 2 IceScream 2015-01-12 50 2 Jan 2015
# 3 IceScream 2015-01-13 100 2 Jan 2015
# 4 IceScream 2015-01-13 50 2 Jan 2015
# 5 IceScream 2015-01-13 200 2 Jan 2015
# 6 IceScream 2015-01-14 200 2 Jan 2015
Grouped by year and month only:
仅按年份和月份分组:
df %>%
group_by(Goods, year, month) %>%
summarise(sum_y = sum(Y))
# A tibble: 2 x 4
# Groups: Goods, year [?]
# Goods year month sum_y
# <fct> <dbl> <ord> <int>
# 1 IceScream 2015 Jan 3350
# 2 Kex 2015 Jan 1633
Grouped by year month and week:
按月和周分组:
df %>%
group_by(Goods, year, month, week) %>%
summarise(sum_y = sum(Y))
# A tibble: 4 x 5
# Groups: Goods, year, month [?]
# Goods year month week sum_y
# <fct> <dbl> <ord> <dbl> <int>
# 1 IceScream 2015 Jan 2 3200
# 2 IceScream 2015 Jan 3 150
# 3 Kex 2015 Jan 2 1147
# 4 Kex 2015 Jan 3 486
#1
1
Maybe this using base
function rather than other package (except dplyr
):
也许这使用基本函数而不是其他包(除了dplyr):
df %>%
group_by(Goods, yearMon = format(as.Date(date,'%d.%m.%Y'),'%b-%Y')) %>%
summarise(new = sum(Y))
# A tibble: 2 x 3
# Groups: Goods [?]
Goods yearMon new
<fct> <chr> <int>
1 IceScream jan-2015 3350
2 Kex jan-2015 1633
EDIT
More basic:
编辑更基本:
df$yearMon <- format(as.Date(df$date,'%d.%m.%Y'),'%b-%Y')
aggregate(cbind(Y) ~ Goods + yearMon, data = df, sum, na.rm = TRUE)
Goods yearMon Y
1 IceScream gen-2015 3350
2 Kex gen-2015 1633
EDIT 2:
To have by week as 1-jan etc., you can try this:
编辑2:按周1和1等,你可以试试这个:
library(lubridate)
df %>%
group_by(Goods, week =paste(ceiling(day(as.Date(date,'%d.%m.%Y')) / 7),month(as.Date(date,'%d.%m.%Y'), label = TRUE, abbr = TRUE),sep='-')
) %>%
summarise(new = sum(Y))
# A tibble: 4 x 3
# Groups: Goods [?]
Goods week new
<fct> <chr> <int>
1 IceScream 2-jan 3200
2 IceScream 3-jan 150
3 Kex 2-jan 1147
4 Kex 3-jan 486
#2
2
Ok, I see that you actually want to summarize by the amount of Y, my bad:
好吧,我看到你真的想用Y的数量总结,我的坏:
df2<-df %>% group_by( yearMon = as.yearmon(dmy(date)), Goods ) %>%
summarise(new = sum(Y))
df2
# A tibble: 2 x 3
# Groups: yearMon [?]
yearMon Goods new
<S3: yearmon> <fct> <int>
1 Jan 2015 IceScream 3350
2 Jan 2015 Kex 1633
If you want to summarize by weeks use:
如果您想按周使用总结:
df2 <- df %>% group_by(Goods,week = week(dmy(date)), ) %>% summarise(new =sum(Y))
which gives:
> df2
# A tibble: 4 x 3
# Groups: Goods [?]
Goods week new
<fct> <dbl> <int>
1 IceScream 2 3200
2 IceScream 3 150
3 Kex 2 1147
4 Kex 3 486
>
#3
1
Using sqldf
:
df$date= as.POSIXct(df$date, format="%d.%m.%Y") # Convert date to POSIXct
df$date=format(as.Date(df$date), "%Y-%m") # Extract year and month
library(sqldf) # Using sqldf group by date and Goods
sqldf("select Goods,date,sum(Y) from df group by date,Goods")
Output:
Goods date sum(Y)
1 IceScream 2015-01 3350
2 Kex 2015-01 1633
#4
0
If you're not sure how you want toaggregate your date, first create separate columns with week, month and year and thenn it is easier to test different versions:
如果您不确定要如何聚合日期,请先创建包含周,月和年的单独列,然后更容易测试不同的版本:
library(dplyr)
library(lubridate)
df <- df %>%
mutate(date = dmy(date),
week = week(date),
month = month(date, label = T),
year = year(date))
df
# Goods date Y week month year
# 1 IceScream 2015-01-12 200 2 Jan 2015
# 2 IceScream 2015-01-12 50 2 Jan 2015
# 3 IceScream 2015-01-13 100 2 Jan 2015
# 4 IceScream 2015-01-13 50 2 Jan 2015
# 5 IceScream 2015-01-13 200 2 Jan 2015
# 6 IceScream 2015-01-14 200 2 Jan 2015
Grouped by year and month only:
仅按年份和月份分组:
df %>%
group_by(Goods, year, month) %>%
summarise(sum_y = sum(Y))
# A tibble: 2 x 4
# Groups: Goods, year [?]
# Goods year month sum_y
# <fct> <dbl> <ord> <int>
# 1 IceScream 2015 Jan 3350
# 2 Kex 2015 Jan 1633
Grouped by year month and week:
按月和周分组:
df %>%
group_by(Goods, year, month, week) %>%
summarise(sum_y = sum(Y))
# A tibble: 4 x 5
# Groups: Goods, year, month [?]
# Goods year month week sum_y
# <fct> <dbl> <ord> <dbl> <int>
# 1 IceScream 2015 Jan 2 3200
# 2 IceScream 2015 Jan 3 150
# 3 Kex 2015 Jan 2 1147
# 4 Kex 2015 Jan 3 486