使用rlang列出purrr:::pmap中的变量

I am trying to write a function called grouped_lm, which basically runs linear regression models for each level of the combination of grouping variables (grouping.vars) for multiple criterion/dependent (crit.vars) and predictor/independent variables (pred.vars).

我正在尝试编写一个名为grouped_lm的函数，它基本上是为多个标准/依赖项(crit.vars)和预测/独立变量(pred.vars)的组合变量(group. vars)组合的每个级别运行线性回归模型。

This is done in such a way that first entry into crit.vars is regressed onto pred.vars. For example, if I enter grouping.vars = am, crit.vars = c(mpg, drat), and crit.vars = c(wt, disp) (in the context of mtcars dataset), the function will run two regression models (mpg ~ wt and drat ~ disp) for each level of grouping variable am (am = 0 and am = 1).

这是这样做的，第一个进入crit。vars退化到pred.vars。例如，如果我输入分组。var =,致命一击。vars = c(mpg, drat)和crit。vars = c(wt, disp)(在mtcars数据集的上下文中)，该函数将对每层分组变量am (am = 0, am = 1)运行两个回归模型(mpg ~ wt, drat ~ disp)。

I have managed to create a dataframe from entered variables, write a custom function that runs linear regression models, but can't seem to figure out how to use rlang to get entered variables into the list elements that will be entered into purrr::pmap.

我已经设法从输入的变量中创建了一个dataframe，编写了一个运行线性回归模型的自定义函数，但是似乎无法找到如何使用rlang将输入的变量输入到将要输入purrr::pmap的列表元素中。

Apologies for a lengthy question and thanks in advance for any help offered.

对一个冗长的问题表示歉意，并预先感谢提供的任何帮助。

# libraries needed
library(tidyverse)
library(plyr)

# function definition
grouped_lm <- function(data,
                       grouping.vars,
                       crit.vars,
                       pred.vars) {

  #================== preparing dataframe ==================
  #
  # check how many variables were entered for criterion variables vector
  crit.vars <-
    as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
  crit.vars <-
    if (length(crit.vars) == 1) {
      crit.vars
    } else {
      crit.vars[-1]
    }

  # check how many variables were entered for predictor variables vector
  pred.vars <-
    as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
  pred.vars <-
    if (length(pred.vars) == 1) {
      pred.vars
    } else {
      pred.vars[-1]
    }

  # check how many variables were entered for grouping variable vector
  grouping.vars <-
    as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
  grouping.vars <-
    if (length(grouping.vars) == 1) {
      grouping.vars
    } else {
      grouping.vars[-1]
    }

  # getting the dataframe ready
  df <- dplyr::select(.data = data,
                      !!!grouping.vars,
                      !!!crit.vars,
                      !!!pred.vars) %>%
    dplyr::group_by(.data = ., !!!grouping.vars) %>%
    tidyr::nest(data = .)

  # checking if the nested dataframe looks okay
  cat(paste("the entire nested dataframe: \n"))
  print(df)             # the entire nested dataframe
  cat(paste("first element of the list column from nested dataframe: \n"))
  print(df$data[[1]])   # first element of the list column

  #============== custom function ================

  # custom function to run linear regression for every element of a list for two variables
  lm_listed <- function(list.col, x_name, y_name) {
    fx <- glue::glue("scale({y_name}) ~ scale({x_name})")

    # this tags any names that are not predictor variables (used to remove intercept terms)
    filter_name  <- glue::glue("scale({x_name})")

    # dataframe with results from lm
    results_df <-
      list.col %>% # running linear regression on each individual group with purrr
      purrr::map(.x = .,
                 .f = ~ stats::lm(formula = as.formula(fx),
                                  data = (.))) %>% # tidying up the output with broom
      purrr::map_dfr(.x = .,
                     .f = ~ broom::tidy(x = .),
                     .id = "group") %>% # remove intercept terms
      dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
      dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
      dplyr::select(
        .data = .,
        group,
        formula,
        term,
        estimate,
        std.error,
        t = statistic,
        p.value
      ) %>% # convert to a tibble dataframe
      tibble::as_data_frame(x = .)

    # return the dataframe
    return(results_df)
  }

  # check if the function works
  group_mtcars <- split(mtcars, mtcars$am)
  fn_results <- purrr::pmap(.l = list(
    l = list(group_mtcars),
    x_name = list('wt', 'disp'),
    y_name = list('mpg', 'drat')
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows()

  # seems to be working!
  cat(paste("the custom function seems to be working!: \n"))
  print(fn_results)

  #========= using  custom function on entered dataframe =================
  cat(paste("running the custom function on the entered dataframe: \n"))
  # running custom function for each element of the created list column
  df_lm <- purrr::pmap(.l = list(
    l = list(df$data),
    x_name = list(!!!pred.vars),
    y_name = list(!!!crit.vars)
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows()

  #============================== output ========================

  print(df_lm)
  # return the final dataframe with results
  return(df_lm)
}

# example usage of the function
grouped_lm(
  data = iris,
  crit.vars = c(Sepal.Length, Petal.Length),
  pred.vars = c(Sepal.Width, Petal.Width),
  grouping.vars = Species
)
#> the entire nested dataframe: 
#> # A tibble: 3 x 2
#>   Species    data             
#>   <fct>      <list>           
#> 1 setosa     <tibble [50 x 4]>
#> 2 versicolor <tibble [50 x 4]>
#> 3 virginica  <tibble [50 x 4]>
#> first element of the list column from nested dataframe: 
#> # A tibble: 50 x 4
#>    Sepal.Length Petal.Length Sepal.Width Petal.Width
#>           <dbl>        <dbl>       <dbl>       <dbl>
#>  1         5.10         1.40        3.50       0.200
#>  2         4.90         1.40        3.00       0.200
#>  3         4.70         1.30        3.20       0.200
#>  4         4.60         1.50        3.10       0.200
#>  5         5.00         1.40        3.60       0.200
#>  6         5.40         1.70        3.90       0.400
#>  7         4.60         1.40        3.40       0.300
#>  8         5.00         1.50        3.40       0.200
#>  9         4.40         1.40        2.90       0.200
#> 10         4.90         1.50        3.10       0.100
#> # ... with 40 more rows
#> the custom function seems to be working!: 
#> # A tibble: 4 x 7
#>   group formula                   term    estimate std.error     t p.value
#>   <chr> <chr>                     <chr>      <dbl>     <dbl> <dbl>   <dbl>
#> 1 0     scale(mpg) ~ scale(wt)    scale(~   -0.768     0.155 -4.94 1.25e-4
#> 2 1     scale(mpg) ~ scale(wt)    scale(~   -0.909     0.126 -7.23 1.69e-5
#> 3 0     scale(drat) ~ scale(disp) scale(~   -0.614     0.192 -3.20 5.20e-3
#> 4 1     scale(drat) ~ scale(disp) scale(~   -0.305     0.287 -1.06 3.12e-1
#> running the custom function on the entered dataframe:
#> Error in !pred.vars: invalid argument type

Created on 2018-03-23 by the reprex package (v0.2.0).

由reprex包(v0.2.0)在2018-03-23创建。

Edits after the answer was provided

提供答案后进行编辑

I was also wondering how I can get separate columns in the ouput for every grouping variable. So, with the answer provided below, if I run-

我还想知道如何为每个分组变量在ouput中获得单独的列。所以，下面给出的答案，如果我跑-

grouped_lm(
  data = mtcars,
  crit.vars = c(wt, mpg),
  pred.vars = c(drat, disp),
  grouping.vars = c(am, cyl)
)

It works, but the output looks something like this:

它可以工作，但是输出是这样的:

As can be appreciated from the picture, it is not at all clear what do the values 1 to 6 represent. So it will be nice to also get a separate column for each grouping variable provided so that, in this example, there will be two columns am and cyl with their respective levels for each lm model.

从图中可以看出，1到6的值代表什么并不清楚。因此，为提供的每个分组变量也提供一个单独的列是很好的，因此，在本例中，每个lm模型将有两个列am和cyl，它们各自的级别。

(I have manually created this dataframe. This is not how the grouping is happening, but this is just to show what the desired output looks like.)

(我已经手工创建了这个dataframe。这不是分组发生的方式，但这只是为了显示所需的输出。

1 个解决方案

#1

If we need to replicate the same behavior as in the example usage of 'mtcars' where the x_name and y_name are strings rather than symbols (which is the case of 'pred.vars' and 'crit.vars'), convert those to strings with quo_name i.e.

如果我们需要复制与“mtcars”示例中相同的行为，其中x_name和y_name是字符串而不是符号(这是“pred”的情况)。vars'和'crit.vars')将它们转换为带有quo_name的字符串。

df_lm <- purrr::pmap(.l = list(
    l = list(df$data),
    x_name = map(pred.vars, quo_name),
    y_name = map(crit.vars, quo_name)
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows()

  #============================== output ========================

  print(df_lm)
  # return the final dataframe with results
  return(df_lm)

}

Or pass as symbol without any evaluation i.e. with !!

或通过作为符号，没有任何评价，例如与!!

df_lm <- purrr::pmap(.l = list(
    l = list(df$data),
    x_name = pred.vars,  ###
    y_name = crit.vars   ###
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows()

  #============================== output ========================

  print(df_lm)
  # return the final dataframe with results
  return(df_lm)

}

This has to do with how the lm_listed function is taking the arguments. Consider the objects as strings

这与列出的函数如何获取参数有关。把对象看作字符串

sl <- "Sepal.Length"
sw <- "Sepal.Width"

and glue returns it correctly

胶水正确返回

glue::glue("scale({sl}) ~ scale({sw})")
#scale(Sepal.Length) ~ scale(Sepal.Width)

Now, we change this to symbol, it also works

现在，我们把它改成符号，它也可以

sl <- rlang::sym("Sepal.Length")
sw <- rlang::sym("Sepal.Width")
glue::glue("scale({sl}) ~ scale({sw})")
#scale(Sepal.Length) ~ scale(Sepal.Width)

But, the problem is in using !! for evaluating that is passed as input argument

但是，问题是在使用!作为输入参数传递的值

sl <- !!rlang::sym("Sepal.Length")

Error in !rlang::sym("Sepal.Length") : invalid argument type

rlang::sym(“Sepal.Length”):无效参数类型。

The !! is evaluated outside the environment of tidyverse functions, which result in the error

的! !是在tidyverse函数的环境之外求值的，这会导致错误吗

-full code

完整的代码

grouped_lm <- function(data,
                       grouping.vars,
                       crit.vars,
                       pred.vars) {

  #================== preparing dataframe ==================
  #
  # check how many variables were entered for criterion variables vector
  crit.vars <-
    as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
  crit.vars <-
    if (length(crit.vars) == 1) {
      crit.vars
    } else {
      crit.vars[-1]
    }

  # check how many variables were entered for predictor variables vector
  pred.vars <-
    as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
  pred.vars <-
    if (length(pred.vars) == 1) {
      pred.vars
    } else {
      pred.vars[-1]
    }

  # check how many variables were entered for grouping variable vector
  grouping.vars <-
    as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
  grouping.vars <-
    if (length(grouping.vars) == 1) {
      grouping.vars
    } else {
      grouping.vars[-1]
    }

  # getting the dataframe ready
  df <- dplyr::select(.data = data,
                      !!!grouping.vars,
                      !!!crit.vars,
                      !!!pred.vars) %>%
    dplyr::group_by(.data = ., !!!grouping.vars) %>%
    tidyr::nest(data = .)

  # checking if the nested dataframe looks okay
  cat(paste("the entire nested dataframe: \n"))
  print(df)             # the entire nested dataframe
  cat(paste("first element of the list column from nested dataframe: \n"))
  print(df$data[[1]])   # first element of the list column

  #============== custom function ================

  # custom function to run linear regression for every element of a list for two variables
  lm_listed <- function(list.col, x_name, y_name) {
    fx <- glue::glue("scale({y_name}) ~ scale({x_name})")

    # this tags any names that are not predictor variables (used to remove intercept terms)
    filter_name  <- glue::glue("scale({x_name})")

    # dataframe with results from lm
    results_df <-
      list.col %>% # running linear regression on each individual group with purrr
      purrr::map(.x = .,
                 .f = ~ stats::lm(formula = as.formula(fx),
                                  data = (.))) %>% # tidying up the output with broom
      purrr::map_dfr(.x = .,
                     .f = ~ broom::tidy(x = .),
                     .id = "group") %>% # remove intercept terms
      dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
      dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
      dplyr::select(
        .data = .,
        group,
        formula,
        term,
        estimate,
        std.error,
        t = statistic,
        p.value
      ) %>% # convert to a tibble dataframe
      tibble::as_data_frame(x = .)

    # return the dataframe
    return(results_df)
  }

  # check if the function works
  group_mtcars <- split(mtcars, mtcars$am)
  fn_results <- purrr::pmap(.l = list(
    l = list(group_mtcars),
    x_name = list('wt', 'disp'),
    y_name = list('mpg', 'drat')
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows()

  # seems to be working!
  cat(paste("the custom function seems to be working!: \n"))
  print(fn_results)

  #========= using  custom function on entered dataframe =================
  cat(paste("running the custom function on the entered dataframe: \n"))
  # running custom function for each element of the created list column

  df_lm <- purrr::pmap(.l = list(
    l = list(df$data),
    x_name = pred.vars,
    y_name = crit.vars
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows()

  #============================== output ========================

  print(df_lm)
  # return the final dataframe with results
  return(df_lm)

}

-run the function

国营的函数

res <- grouped_lm(
  data = iris,
  crit.vars = c(Sepal.Length, Petal.Length),
  pred.vars = c(Sepal.Width, Petal.Width),
  grouping.vars = Species
)

-output printed

与产出印刷

#the entire nested dataframe: 
# A tibble: 3 x 2
#  Species    data             
#  <fctr>     <list>           
#1 setosa     <tibble [50 x 4]>
#2 versicolor <tibble [50 x 4]>
#3 virginica  <tibble [50 x 4]>
#first element of the list column from nested dataframe: 
# A tibble: 50 x 4
#   Sepal.Length Petal.Length Sepal.Width Petal.Width
#          <dbl>        <dbl>       <dbl>       <dbl>
# 1         5.10         1.40        3.50       0.200
# 2         4.90         1.40        3.00       0.200
# 3         4.70         1.30        3.20       0.200
# 4         4.60         1.50        3.10       0.200
# 5         5.00         1.40        3.60       0.200
# 6         5.40         1.70        3.90       0.400
# 7         4.60         1.40        3.40       0.300
# 8         5.00         1.50        3.40       0.200
# 9         4.40         1.40        2.90       0.200
#10         4.90         1.50        3.10       0.100
# ... with 40 more rows
#the custom function seems to be working!: 
# A tibble: 4 x 7
#  group formula                   term        estimate std.error     t   p.value
#  <chr> <chr>                     <chr>          <dbl>     <dbl> <dbl>     <dbl>
#1 0     scale(mpg) ~ scale(wt)    scale(wt)     -0.768     0.155 -4.94 0.000125 
#2 1     scale(mpg) ~ scale(wt)    scale(wt)     -0.909     0.126 -7.23 0.0000169
#3 0     scale(drat) ~ scale(disp) scale(disp)   -0.614     0.192 -3.20 0.00520  
#4 1     scale(drat) ~ scale(disp) scale(disp)   -0.305     0.287 -1.06 0.312    
#running the custom function on the entered dataframe: 
# A tibble: 6 x 7
#  group formula                                  term               estimate std.error     t         p.value
#  <chr> <chr>                                    <chr>                 <dbl>     <dbl> <dbl>           <dbl>
#1 1     scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width)    0.743    0.0967  7.68 0.000000000671 
#2 2     scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width)    0.526    0.123   4.28 0.0000877      
#3 3     scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width)    0.457    0.128   3.56 0.000843       
#4 1     scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width)    0.332    0.136   2.44 0.0186         
#5 2     scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width)    0.787    0.0891  8.83 0.0000000000127
#6 3     scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width)    0.322    0.137   2.36 0.0225

-result output

结果输出

res
# A tibble: 6 x 7
#  group formula                                  term               estimate std.error     t         p.value
#  <chr> <chr>                                    <chr>                 <dbl>     <dbl> <dbl>           <dbl>
#1 1     scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width)    0.743    0.0967  7.68 0.000000000671 
#2 2     scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width)    0.526    0.123   4.28 0.0000877      
#3 3     scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width)    0.457    0.128   3.56 0.000843       
#4 1     scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width)    0.332    0.136   2.44 0.0186         
#5 2     scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width)    0.787    0.0891  8.83 0.0000000000127
#6 3     scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width)    0.322    0.137   2.36 0.0225

If we need to have the 'grouping.vars' also in the output

如果我们需要分组。vars也在输出中

grouped_lm <- function(data,
                       grouping.vars,
                       crit.vars,
                       pred.vars) {

  #================== preparing dataframe ==================
  #
  # check how many variables were entered for criterion variables vector
  crit.vars <-
    as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
  crit.vars <-
    if (length(crit.vars) == 1) {
      crit.vars
    } else {
      crit.vars[-1]
    }

  # check how many variables were entered for predictor variables vector
  pred.vars <-
    as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
  pred.vars <-
    if (length(pred.vars) == 1) {
      pred.vars
    } else {
      pred.vars[-1]
    }

  # check how many variables were entered for grouping variable vector
  grouping.vars <-
    as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
  grouping.vars <-
    if (length(grouping.vars) == 1) {
      grouping.vars
    } else {
      grouping.vars[-1]
    }

  # getting the dataframe ready
  df <- dplyr::select(.data = data,
                      !!!grouping.vars,
                      !!!crit.vars,
                      !!!pred.vars) %>%
    dplyr::group_by(.data = ., !!!grouping.vars) %>%
    tidyr::nest(data = .)

  # checking if the nested dataframe looks okay
  cat(paste("the entire nested dataframe: \n"))
  print(df)             # the entire nested dataframe
  cat(paste("first element of the list column from nested dataframe: \n"))
  print(df$data[[1]])   # first element of the list column

  #============== custom function ================

  # custom function to run linear regression for every element of a list for two variables
  lm_listed <- function(list.col, x_name, y_name) {
    fx <- glue::glue("scale({y_name}) ~ scale({x_name})")

    # this tags any names that are not predictor variables (used to remove intercept terms)
    filter_name  <- glue::glue("scale({x_name})")

    # dataframe with results from lm
    results_df <-
      list.col %>% # running linear regression on each individual group with purrr
      purrr::map(.x = .,
                 .f = ~ stats::lm(formula = as.formula(fx),
                                  data = (.))) %>% # tidying up the output with broom
      purrr::map_dfr(.x = .,
                     .f = ~ broom::tidy(x = .),
                     .id = "group") %>% # remove intercept terms
      dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
      dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
      dplyr::select(
        .data = .,
        group,
        formula,
        term,
        estimate,
        std.error,
        t = statistic,
        p.value
      ) %>% # convert to a tibble dataframe
      tibble::as_data_frame(x = .)

    # return the dataframe
    return(results_df)
  }

  # check if the function works
  group_mtcars <- split(mtcars, mtcars$am)
  fn_results <- purrr::pmap(.l = list(
    l = list(group_mtcars),
    x_name = list('wt', 'disp'),
    y_name = list('mpg', 'drat')
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows()

  # seems to be working!
  cat(paste("the custom function seems to be working!: \n"))
  print(fn_results)

  #========= using  custom function on entered dataframe =================
  cat(paste("running the custom function on the entered dataframe: \n"))
  # running custom function for each element of the created list column

  df <- df %>%
          tibble::rownames_to_column('group')  
  df_lm <- purrr::pmap(.l = list(
    l = list(df$data),
    x_name = pred.vars,
    y_name = crit.vars
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows() %>%
    left_join(df) %>%
    select(!!!grouping.vars, everything()) %>%
    select(-group, -data)   



  #============================== output ========================

  print(df_lm)
  # return the final dataframe with results
  return(df_lm)

}

-run function

国营函数

r1 <- grouped_lm(
   data = mtcars,
   crit.vars = c(wt, mpg),
   pred.vars = c(drat, disp),
   grouping.vars = c(am, cyl)
 )

-output

与产出

r1
# A tibble: 12 x 8
#      am   cyl formula                  term        estimate std.error        t   p.value
#   <dbl> <dbl> <chr>                    <chr>          <dbl>     <dbl>    <dbl>     <dbl>
# 1  1.00  6.00 scale(wt) ~ scale(drat)  scale(drat)   -0.101     0.995 -  0.102   0.935  
# 2  1.00  4.00 scale(wt) ~ scale(drat)  scale(drat)   -0.226     0.398 -  0.568   0.591  
# 3  0     6.00 scale(wt) ~ scale(drat)  scale(drat)    0.307     0.673    0.456   0.693  
# 4  0     8.00 scale(wt) ~ scale(drat)  scale(drat)   -0.119     0.314 -  0.379   0.713  
# 5  0     4.00 scale(wt) ~ scale(drat)  scale(drat)    0.422     0.906    0.466   0.722  
# 6  1.00  8.00 scale(wt) ~ scale(drat)  scale(drat)   -1.00    NaN      NaN     NaN      
# 7  1.00  6.00 scale(mpg) ~ scale(disp) scale(disp)    1.00      0      Inf       0      
# 8  1.00  4.00 scale(mpg) ~ scale(disp) scale(disp)   -0.835     0.225 -  3.72    0.00991
# 9  0     6.00 scale(mpg) ~ scale(disp) scale(disp)    0.670     0.525    1.28    0.330  
#10  0     8.00 scale(mpg) ~ scale(disp) scale(disp)   -0.535     0.267 -  2.00    0.0729 
#11  0     4.00 scale(mpg) ~ scale(disp) scale(disp)    0.932     0.362    2.57    0.236  
#12  1.00  8.00 scale(mpg) ~ scale(disp) scale(disp)    1.00    NaN      NaN     NaN

#1

df_lm <- purrr::pmap(.l = list(
    l = list(df$data),
    x_name = map(pred.vars, quo_name),
    y_name = map(crit.vars, quo_name)
  ),
  .f = lm_listed) %>%
    dplyr::bind_rows()

  #============================== output ========================

  print(df_lm)
  # return the final dataframe with results
  return(df_lm)

}