R - 三元函数的矢量化实现

时间:2021-10-05 21:20:51

I have three vectors X, Y and Z of equal length n. I need to create an n x n x n array of a function f(X[i],Y[j],Z[k]). The straightforward way to do this is to sequentially loop through each element of each of the 3 vectors. However, the time required to compute the array grows exponentially with n. Is there a way to implement this using vectorized operations?

我有三个相等长度为n的向量X,Y和Z.我需要创建一个函数f(X [i],Y [j],Z [k])的n x n x n数组。直接的方法是顺序循环遍历3个向量中每个向量的每个元素。但是,计算阵列所需的时间随n呈指数增长。有没有办法使用矢量化操作来实现它?

EDIT: As mentioned in the comments, I have added a simple example of what's needed.

编辑:正如评论中所提到的,我添加了一个简单的例子来说明需要什么。

set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)

F = array(0, dim=c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
  for (j in 1:length(Y))
    for (k in 1:length(Z))
      F[i,j,k] = X[i] * (Y[j] + Z[k])

Thanks.

3 个解决方案

#1


6  

You can use nested outer :

你可以使用嵌套的外部:

set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)

F = array(0, dim = c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
  for (j in 1:length(Y))
    for (k in 1:length(Z))
      F[i,j,k] = X[i] * (Y[j] + Z[k])

F2 <- outer(X, outer(Y, Z, "+"), "*")

> identical(F, F2)
[1] TRUE

A microbenchmark including the expand.grid solution proposed by Nick K :

包含Nick K提出的expand.grid解决方案的微基准测试:

X = rnorm(100)
Y = seq(1:100)
Z = seq(101:200)

forLoop <- function(X, Y, Z) {
  F = array(0, dim = c( length(X),length(Y),length(Z) ) )
  for (i in 1:length(X))
    for (j in 1:length(Y))
      for (k in 1:length(Z))
        F[i,j,k] = X[i] * (Y[j] + Z[k])
  return(F)
}

nestedOuter <- function(X, Y, Z) {
  outer(X, outer(Y, Z, "+"), "*")
}

expandGrid <- function(X, Y, Z) {
  df <- expand.grid(X = X, Y = Y, Z = Z)
  G <- df$X * (df$Y + df$Z)
  dim(G) <- c(length(X), length(Y), length(Z))
  return(G)
}

library(microbenchmark)
mbm <- microbenchmark(
  forLoop = F1 <- forLoop(X, Y, Z), 
  nestedOuter = F2 <- nestedOuter(X, Y, Z), 
  expandGrid = F3 <- expandGrid(X, Y, Z), 
  times = 50L)

> mbm
Unit: milliseconds
expr         min         lq        mean      median          uq        max neval
forLoop 3261.872552 3339.37383 3458.812265 3388.721159 3524.651971 4074.40422    50
nestedOuter    3.293461    3.36810    9.874336    3.541637    5.126789   54.24087    50
expandGrid   53.907789   57.15647   85.612048   88.286431  103.516819  235.45443    50

#2


6  

Here's as an additional option, a possible Rcpp implementation (in case you like your loops). I wasn't able to outperform @Juliens solution though (maybe someone can), but they are more or less have the same timing

这是一个额外的选项,一个可能的Rcpp实现(如果你喜欢你的循环)。虽然我可能无法超越@Juliens解决方案(也许有人可以),但他们或多或少都有相同的时机

library(Rcpp)
cppFunction('NumericVector RCPP(NumericVector X,  NumericVector Y, NumericVector Z){

             int nrow = X.size(), ncol = 3, indx = 0;
             double temp(1) ;
             NumericVector out(pow(nrow, ncol)) ;
             IntegerVector dim(ncol) ;

             for (int l = 0; l < ncol; l++){
               dim[l] = nrow;
             }             

            for (int j = 0; j < nrow; j++) {
               for (int k = 0; k < nrow; k++) {
                     temp = Y[j] + Z[k] ;
                   for (int i = 0; i < nrow; i++) {
                         out[indx] = X[i] * temp ;
                         indx += 1 ;
                   }
               }
            }

            out.attr("dim") = dim;
            return out;
}')

Validating

identical(RCPP(X, Y, Z), F)
## [1] TRUE

A quick benchmark

一个快速的基准

set.seed(123)
X = rnorm(100)
Y = 1:100
Z = 101:200

nestedOuter <- function(X, Y, Z) outer(X, outer(Y, Z, "+"), "*")

library(microbenchmark)
microbenchmark( 
  nestedOuter = nestedOuter(X, Y, Z),  
  RCPP = RCPP(X, Y, Z),
  unit = "relative",
  times = 1e4)

# Unit: relative
#        expr      min       lq     mean   median       uq       max neval
# nestedOuter 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 10000
#        RCPP 1.164254 1.141713 1.081235 1.100596 1.080133 0.7092394 10000

#3


2  

You could use expand.grid as follows:

您可以使用expand.grid,如下所示:

df <- expand.grid(X = X, Y = Y, Z = Z)
G <- df$X * (df$Y + df$Z)
dim(G) <- c(length(X), length(Y), length(Z))
all.equal(F, G)

If you had a vectorised function, this would work just as well. If not, you could use plyr::daply.

如果你有一个矢量化函数,这也可以。如果没有,你可以使用plyr :: daply。

#1


6  

You can use nested outer :

你可以使用嵌套的外部:

set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)

F = array(0, dim = c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
  for (j in 1:length(Y))
    for (k in 1:length(Z))
      F[i,j,k] = X[i] * (Y[j] + Z[k])

F2 <- outer(X, outer(Y, Z, "+"), "*")

> identical(F, F2)
[1] TRUE

A microbenchmark including the expand.grid solution proposed by Nick K :

包含Nick K提出的expand.grid解决方案的微基准测试:

X = rnorm(100)
Y = seq(1:100)
Z = seq(101:200)

forLoop <- function(X, Y, Z) {
  F = array(0, dim = c( length(X),length(Y),length(Z) ) )
  for (i in 1:length(X))
    for (j in 1:length(Y))
      for (k in 1:length(Z))
        F[i,j,k] = X[i] * (Y[j] + Z[k])
  return(F)
}

nestedOuter <- function(X, Y, Z) {
  outer(X, outer(Y, Z, "+"), "*")
}

expandGrid <- function(X, Y, Z) {
  df <- expand.grid(X = X, Y = Y, Z = Z)
  G <- df$X * (df$Y + df$Z)
  dim(G) <- c(length(X), length(Y), length(Z))
  return(G)
}

library(microbenchmark)
mbm <- microbenchmark(
  forLoop = F1 <- forLoop(X, Y, Z), 
  nestedOuter = F2 <- nestedOuter(X, Y, Z), 
  expandGrid = F3 <- expandGrid(X, Y, Z), 
  times = 50L)

> mbm
Unit: milliseconds
expr         min         lq        mean      median          uq        max neval
forLoop 3261.872552 3339.37383 3458.812265 3388.721159 3524.651971 4074.40422    50
nestedOuter    3.293461    3.36810    9.874336    3.541637    5.126789   54.24087    50
expandGrid   53.907789   57.15647   85.612048   88.286431  103.516819  235.45443    50

#2


6  

Here's as an additional option, a possible Rcpp implementation (in case you like your loops). I wasn't able to outperform @Juliens solution though (maybe someone can), but they are more or less have the same timing

这是一个额外的选项,一个可能的Rcpp实现(如果你喜欢你的循环)。虽然我可能无法超越@Juliens解决方案(也许有人可以),但他们或多或少都有相同的时机

library(Rcpp)
cppFunction('NumericVector RCPP(NumericVector X,  NumericVector Y, NumericVector Z){

             int nrow = X.size(), ncol = 3, indx = 0;
             double temp(1) ;
             NumericVector out(pow(nrow, ncol)) ;
             IntegerVector dim(ncol) ;

             for (int l = 0; l < ncol; l++){
               dim[l] = nrow;
             }             

            for (int j = 0; j < nrow; j++) {
               for (int k = 0; k < nrow; k++) {
                     temp = Y[j] + Z[k] ;
                   for (int i = 0; i < nrow; i++) {
                         out[indx] = X[i] * temp ;
                         indx += 1 ;
                   }
               }
            }

            out.attr("dim") = dim;
            return out;
}')

Validating

identical(RCPP(X, Y, Z), F)
## [1] TRUE

A quick benchmark

一个快速的基准

set.seed(123)
X = rnorm(100)
Y = 1:100
Z = 101:200

nestedOuter <- function(X, Y, Z) outer(X, outer(Y, Z, "+"), "*")

library(microbenchmark)
microbenchmark( 
  nestedOuter = nestedOuter(X, Y, Z),  
  RCPP = RCPP(X, Y, Z),
  unit = "relative",
  times = 1e4)

# Unit: relative
#        expr      min       lq     mean   median       uq       max neval
# nestedOuter 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 10000
#        RCPP 1.164254 1.141713 1.081235 1.100596 1.080133 0.7092394 10000

#3


2  

You could use expand.grid as follows:

您可以使用expand.grid,如下所示:

df <- expand.grid(X = X, Y = Y, Z = Z)
G <- df$X * (df$Y + df$Z)
dim(G) <- c(length(X), length(Y), length(Z))
all.equal(F, G)

If you had a vectorised function, this would work just as well. If not, you could use plyr::daply.

如果你有一个矢量化函数,这也可以。如果没有,你可以使用plyr :: daply。