从R的分组数据的每周数据框架到每月数据框架的汇总

时间:2021-05-12 16:13:50

My df had different states associated with their cities of weekly data with value_1 and value_2

我的df具有与其每周数据城市相关联的不同状态,其值为value_1和value_2

 structure(list(city_id = c("FS030", "FS030", 
    "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", 
    "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", 
    "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", 
    "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", 
    "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", "FS030", 
    "FS030", "FS030", "FS030"), state_id = c(580L, 
    580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 
    580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 
    580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 580L, 
    580L, 580L, 580L, 580L, 580L, 580L), dt = structure(c(17650, 
    17685, 17517, 17622, 17566, 17475, 17734, 17559, 17636, 17657, 
    17461, 17713, 17608, 17503, 17510, 17454, 17727, 17692, 17664, 
    17587, 17524, 17538, 17419, 17440, 17594, 17447, 17468, 17678, 
    17552, 17629, 17426, 17643, 17720, 17706, 17601, 17489, 17580, 
    17671, 17545, 17699), class = "Date"), value_1 = c(0.411567769583333, 
    0.431277450416667, 0.35, 0.388759958333333, 0.425288925833333, 
    0.35, 0.510489409, 0.35, 0.413527006666667, 0.435785208333333, 
    0.35, 0.3437731696875, 0.370323125, 0.35, 0.35, 0.35, 0.3289119371875, 
    0.35, 0.385370253333333, 0.386275627916667, 0.35, 0.35, 0.35, 
    0.35, 0.38542251125, 0.35, 0.35, 0.433777874583333, 0.35, 0.37273111375, 
    0.35, 0.414055592916667, 0.36269250125, 0.3190907646875, 0.38305069625, 
    0.35, 0.36493013, 0.397780668333333, 0.35, 0.31741553375), value_2 = c(0.204795420930233, 
    0.171267078372093, 0.35, 0.255914227906977, 0.193582022093023, 
    0.35, 0.25652965627907, 0.35, 0.177623573488372, 0.196440658837209, 
    0.35, 0.19950949372093, 0.20368746627907, 0.35, 0.35, 0.35, 0.197593547674419, 
    0.194895088139535, 0.177148813255814, 0.168303015581395, 0.35, 
    0.35, 0.35, 0.35, 0.16433289372093, 0.35, 0.35, 0.169355578604651, 
    0.35, 0.174366632325581, 0.35, 0.183884318372093, 0.294483633953488, 
    0.193520130465116, 0.206268007209302, 0.35, 0.217896235813953, 
    0.177587891395349, 0.35, 0.207436642325581)), class = c("data.table", 
    "data.frame"), row.names = c(NA, -40L), .internal.selfref = <pointer: 0x000000000b7d1ef0>)

Would like to calculate the growth rate of each city using dplyr. Desired output :

想用dplyr计算每个城市的增长率。期望的输出:

state city value1 value2
1      x    0.32    .11

Note the date are not in order so we need to order it first and then calculate growth rate of each city using dplyr

请注意,日期不是有序的,所以我们需要先订购,然后使用dplyr计算每个城市的增长率

2 个解决方案

#1


4  

We could group by 'city_id', 'state_id' and rounded date of 'dt' and get the mean of 'value' columns

我们可以按'city_id','state_id'和'dt'的舍入日期进行分组,得到'value'列的平均值

library(data.table)
library(lubridate)
dt1[, lapply(.SD, mean), 
      .(three_month = round_date(dt, "quarter"), city_id, state_id)]

Or with dplyr

或者用dplyr

library(tidyverse)
dt1 %>%
   group_by(three_month = round_date(dt, "quarter"), city_id, state_id) %>%
   summarise_at(vars(starts_with('value')), mean)

#2


0  

Might work like this also

也可能这样工作

growth1 <- function(x) ((last(x)-first(x))/first(x))* 100
    library(dplyr)
    df <- df %>%
    group_by(state,city) %>%
    summarise_at(vars(starts_with('value')),growth1)

#1


4  

We could group by 'city_id', 'state_id' and rounded date of 'dt' and get the mean of 'value' columns

我们可以按'city_id','state_id'和'dt'的舍入日期进行分组,得到'value'列的平均值

library(data.table)
library(lubridate)
dt1[, lapply(.SD, mean), 
      .(three_month = round_date(dt, "quarter"), city_id, state_id)]

Or with dplyr

或者用dplyr

library(tidyverse)
dt1 %>%
   group_by(three_month = round_date(dt, "quarter"), city_id, state_id) %>%
   summarise_at(vars(starts_with('value')), mean)

#2


0  

Might work like this also

也可能这样工作

growth1 <- function(x) ((last(x)-first(x))/first(x))* 100
    library(dplyr)
    df <- df %>%
    group_by(state,city) %>%
    summarise_at(vars(starts_with('value')),growth1)