Cesium原理篇：6 Render模块(6: Instance实例化)

最近研究Cesium的实例化，尽管该技术需要在WebGL2.0，也就是OpenGL ES3.0才支持。调试源码的时候眼前一亮，发现VAO和glDrawBuffers都不是WebGL1.0的标准函数，都是扩展功能，看来WebGL2.0标准的推广势在必行啊。同时发现，通过ANGLE_instanced_arrays的扩展，也可以在WebGL1.0下实现实例化，创建实例化方法的代码如下：

var glDrawElementsInstanced;

var glDrawArraysInstanced;

var glVertexAttribDivisor;

var instancedArrays;

// WebGL2.0标准直接提供了实例化接口

if (webgl2) {

    glDrawElementsInstanced = function(mode, count, type, offset, instanceCount) {

         gl.drawElementsInstanced(mode, count, type, offset, instanceCount);

    };

    glDrawArraysInstanced = function(mode, first, count, instanceCount) {

        gl.drawArraysInstanced(mode, first, count, instanceCount);

    };

    glVertexAttribDivisor = function(index, divisor) {

        gl.vertexAttribDivisor(index, divisor);

    };

} else {

    // WebGL1.0下

    // 扩展ANGLE_instanced_arrays

    instancedArrays = getExtension(gl, ['ANGLE_instanced_arrays']);

    if (defined(instancedArrays)) {

        glDrawElementsInstanced = function(mode, count, type, offset, instanceCount) {

            instancedArrays.drawElementsInstancedANGLE(mode, count, type, offset, instanceCount);

        };

        glDrawArraysInstanced = function(mode, first, count, instanceCount) {

            instancedArrays.drawArraysInstancedANGLE(mode, first, count, instanceCount);

        };

        glVertexAttribDivisor = function(index, divisor) {

            instancedArrays.vertexAttribDivisorANGLE(index, divisor);

        };

    }

}

// 涉及到实例化的三个方法

this.glDrawElementsInstanced = glDrawElementsInstanced;

this.glDrawArraysInstanced = glDrawArraysInstanced;

this.glVertexAttribDivisor = glVertexAttribDivisor;

this._instancedArrays = !!instancedArrays;

通过这样的封装，Cesium.Context提供了标准的实例化方法，不需要用户过多的关心WebGL标准的差异。而实例化的渲染也非常简单，核心代码如下：

functioncontinueDraw(context, drawCommand) {

    // ……

    var instanceCount = drawCommand.instanceCount;

    if (defined(indexBuffer)) {

        offset = offset * indexBuffer.bytesPerIndex;

        // offset in vertices to offset in bytes

        count = defaultValue(count, indexBuffer.numberOfIndices);

        if (instanceCount === 0) {

            context._gl.drawElements(primitiveType, count, indexBuffer.indexDatatype, offset);

        } else {

            context.glDrawElementsInstanced(primitiveType, count, indexBuffer.indexDatatype, offset, instanceCount);

        }

    } else {

        count = defaultValue(count, va.numberOfVertices);

        if (instanceCount === 0) {

            context._gl.drawArrays(primitiveType, offset, count);

        } else {

        context.glDrawArraysInstanced(primitiveType, offset, count, instanceCount);

        }

    }

    // ……

｝

是否实例化渲染，取决于你所构造的DrawCommand是否有实例化的信息，对应代码中的drawCommand.instanceCount，如果你的实例化数目不为零，则进行实例化的渲染。因此，Context中对实例化进行了封装，内部的渲染机制中，实例化和非实例化的渲染机制差别并不大。从应用的角度来看，我们并不需要关心Context的实现，而是通过构造DrawCommand来决定是否想要实例化渲染。

之前我们较详细的介绍过Renderer.DrawCommand模块，如果不清楚的回去再翻翻看，在VertexArray中实现了VAO中创建attr.vertexAttrib，这里有一个instanceDivisor属性，这就是用来表示该attribute是否是实例化的divisor：

attr.vertexAttrib = function(gl) {

    var index = this.index;

    gl.bindBuffer(gl.ARRAY_BUFFER, this.vertexBuffer._getBuffer());

    gl.vertexAttribPointer(index, this.componentsPerAttribute, this.componentDatatype, this.normalize,

        this.strideInBytes, this.offsetInBytes);

    gl.enableVertexAttribArray(index); 

    if (this.instanceDivisor > 0) {

        context.glVertexAttribDivisor(index, this.instanceDivisor);

        context._vertexAttribDivisors[index] = this.instanceDivisor; context._previousDrawInstanced = true;

    }

};

根据OpenGL的定义，glVertexAttribDivisor在设置多实例渲染时，位于index位置的顶点着色器中顶点属性是如何分配值到每个实例的。instanceDivisor如果是0，那该属性的多实例特性将被禁用，其他值则表示顶点着色器中，每instanceDivisor个实力会分配一个新的属性值。

可见，对于一个DrawCommand，实例化有三处特别的地方，一个是attribute的instanceDivisor属性，用来确定实例化的频率，一个是instanceCount，实例化的个数，最后一个，当然是顶点着色器了，attribute属性传到顶点着色器了，你得用才有效果啊。因为实例化的*度很高，所以多数情况下需要你自己写。

当然，目前Cesium用实例化的地方不多，只有BillboardCollection和3D Tiles中用到了，提供了完整的实现方法，不妨看看Cesium自己的调用方式，学习一下Cesium中如何使用实例化的。

ModelInstanceCollection

我们先看一下3D Tiles中实例化的实现方式，这个较为简单，因为3D Tiles中的数据都是预处理的，可以直接加载，另外因为模型是gltf，自带Shader，不需要过多的逻辑判断。

Cesium原理篇：6 Render模块(6: Instance实例化)

上面是一个3D Tiles实例化的效果图，可见除了位置不同，其他的都一致。实例化正是避免相同属性之间的内存和显存的调度，同时对不同的属性的调度进行优化，从而提高渲染效率。我们先看一下数据处理的代码：

functiongetVertexBufferData(collection, context, result) {

    var instances = collection._instances;

    var instancesLength = collection.length;

    var collectionCenter = collection._center;

    var vertexSizeInFloats = 12;

    if (!defined(result)) {

        result = new Float32Array(instancesLength * vertexSizeInFloats);

    }

    for (var i = 0; i < instancesLength; ++i) {

        var modelMatrix = instances[i].modelMatrix; 

        // Instance matrix is relative to center

        var instanceMatrix = Matrix4.clone(modelMatrix, scratchMatrix);

        instanceMatrix[12] -= collectionCenter.x;

        instanceMatrix[13] -= collectionCenter.y;

        instanceMatrix[14] -= collectionCenter.z;

        var offset = i * vertexSizeInFloats; 

        // First three rows of the model matrix

        result[offset + 0] = instanceMatrix[0];

        result[offset + 1] = instanceMatrix[4];

        result[offset + 2] = instanceMatrix[8];

        result[offset + 3] = instanceMatrix[12];

        result[offset + 4] = instanceMatrix[1];

        result[offset + 5] = instanceMatrix[5];

        result[offset + 6] = instanceMatrix[9];

        result[offset + 7] = instanceMatrix[13];

        result[offset + 8] = instanceMatrix[2];

        result[offset + 9] = instanceMatrix[6];

        result[offset + 10] = instanceMatrix[10];

        result[offset + 11] = instanceMatrix[14];

    }

    return result;

}

代码有点长，但不难理解，instancesLength是要进行实例化的实例个数，collectionCenter则是这些实例Collection的中心点，以前这些实例中都保存的是相对球心的模型矩阵，这样构建instancesLength个DrawCommand，最终渲染到FBO中。但发现，这些实例基本一样啊，以前是一笔一划的渲染出来，不然先弄一个印章，然后啪啪啪的盖在不同的位置就可以了，这样多快啊。所以现在要对他进行实例化的改造。这样，当我在collectionCenter位置构造了一个实例（印章），你们告诉我距离中心点的偏移量，我就知道在哪里直接“盖”这个实例了。所以，数据上，我们需要把这个矩阵改为相对collectionCenter的，getVertexBufferData就是做这个事情。接着在createVertexBuffer中我们将这个矩阵数据构建成一个VertexBuffer：

function createVertexBuffer(collection, context) {

    var vertexBufferData = getVertexBufferData(collection, context);

    collection._vertexBuffer = Buffer.createVertexBuffer({

        context : context,

        typedArray : vertexBufferData, usage : dynamic ?  BufferUsage.STREAM_DRAW : BufferUsage.STATIC_DRAW

    });

}

这样，当我们创建好适合实例化的VertexBuffer后，就可以封装实例化的属性：

function createModel(collection, context) {

    var instancingSupported = collection._instancingSupported;

    var modelOptions;

    if (instancingSupported) {

        createVertexBuffer(collection, context);

        var instancedAttributes = {

            czm_modelMatrixRow0 : {

                index : 0, // updated in Model

                vertexBuffer : collection._vertexBuffer,

                componentsPerAttribute : 4,

                componentDatatype : ComponentDatatype.FLOAT,

                normalize : false,

                offsetInBytes : 0,

                strideInBytes : componentSizeInBytes * vertexSizeInFloats,

                instanceDivisor : 1

               },

            czm_modelMatrixRow1 : {

                index : 0, // updated in Model

                vertexBuffer : collection._vertexBuffer,

                componentsPerAttribute : 4,

                componentDatatype : ComponentDatatype.FLOAT,

                normalize : false,

                offsetInBytes : componentSizeInBytes * 4,

                strideInBytes : componentSizeInBytes * vertexSizeInFloats,

                instanceDivisor : 1

               },

            czm_modelMatrixRow2 : {

                index : 0, // updated in Model

                vertexBuffer : collection._vertexBuffer,

                componentsPerAttribute : 4,

                componentDatatype : ComponentDatatype.FLOAT,

                normalize : false,

                offsetInBytes : componentSizeInBytes * 8,

                strideInBytes : componentSizeInBytes * vertexSizeInFloats,

                instanceDivisor : 1

            }

           };

        modelOptions.precreatedAttributes = instancedAttributes;

    } 

    collection._model = new Model(modelOptions);

}

这里稍微有一些麻烦，将矩阵分解为3个vec4的attribute，分别对应czm_modelMatrixRow0~2，这里可以看到，每一个实例化的属性中，instanceDivisor值为1，也就是一个实例更新一次，正好对应每一个实例的偏移量。最后构造成Model(_precreatedAttributes)。

该Model实际上就是一个实例集合，在Model.update()中调用createVertexArrays方法创建VAO。这样完成了一个arraybuffer（内存）->VertexBuffer（显存）->Attributes->VertexArray的整个过程。最后绑定到DrawCommand中进行渲染。整个流程大概如下：

ModelInstanceCollection.prototype.update = function(frameState) {

    if (this._state === LoadState.NEEDS_LOAD) {

        this._state = LoadState.LOADING;

        this._instancingSupported = context.instancedArrays;

        // 数据处理，符合实例化的需要

        createModel(this, context);

    } 

    var model = this._model;

    // 创建VAO

    model.update(frameState);

    if (instancingSupported) {

        // 构造最终的DrawCommand

        // 指定instanceCount

        // 绑定VertexArray

        createCommands(this, modelCommands.draw, modelCommands.pick);

    }

}

这样，一个实例化的DrawCommand完成，以前需要Count个DrawCommand渲染的过程，只需要一个DrawCommand一次性渲染instanceCount个实例即可。当然，这里没有给出3D Instance Tiles的顶点着色器代码，只好自己想象这样一个转换代码：a_position为原点，通过czm_modelMatrixRow0，czm_modelMatrixRow1，czm_modelMatrixRow2三个相对原点的偏移矩阵构造出czm_instanced_modelView模型试图矩阵，最终结合投影矩阵计算出gl_Position。

Billboard:

这里主要介绍Billboard中实例化的设计和封装，至于Billboard的整个过程，我们后续在介绍DataSource模块时再详细介绍。首先，我们要自己明白，对Billboard进行实例化渲染的意义在哪里，在目标明确的基础下，我们才能总结这些实例之间的共同出和不同点，方能更好的设计：哪些属性需要实例化，哪些属性不需要。

大家可以自己思考一下，再往下看。因为这个涉及到对Billboard的理解，本篇主要集中在实例化上面，所以，直接给出Cesium的设计。

var attributeLocationsInstanced = {

    direction : 0,

    positionHighAndScale : 1,

    positionLowAndRotation : 2,

    // texture offset in w

    compressedAttribute0 : 3,

    compressedAttribute1 : 4,

    compressedAttribute2 : 5,

    eyeOffset : 6,

    // texture range in w

    scaleByDistance : 7,

    pixelOffsetScaleByDistance : 8

};

如上是Cesium中Billboard需要的attribute属性，对每一个实例而言，direction都是一样的，而其他八个属性则不同。direction是公告板的四个顶点的相对位置（比例），对于所有公告板，这四个顶点之间的相对位置是一样的，就好比一个印章，你只需要缩放一下相对位置就可以改变整体大小，移动一下位置就可以改变整体位置，旋转也是如此。无论怎么变，Billboard的样式都不会走样。因此，在BillboardCollection中，会默认创建唯一的公告板的direction：

functiongetIndexBufferInstanced(context) {

    var indexBuffer = context.cache.billboardCollection_indexBufferInstanced;

    if (defined(indexBuffer)) {

        return indexBuffer;

    }

    indexBuffer = Buffer.createIndexBuffer({

        context : context,

        typedArray : new Uint16Array([0, 1, 2, 0, 2, 3]),

        usage : BufferUsage.STATIC_DRAW,

        indexDatatype : IndexDatatype.UNSIGNED_SHORT

    });

    indexBuffer.vertexArrayDestroyable = false;

    context.cache.billboardCollection_indexBufferInstanced = indexBuffer;

    return indexBuffer;

} 

function getVertexBufferInstanced(context) {

    var vertexBuffer = context.cache.billboardCollection_vertexBufferInstanced;

    if (defined(vertexBuffer)) {

        return vertexBuffer;

    } 

    vertexBuffer = Buffer.createVertexBuffer({

        context : context,

        typedArray : new Float32Array([0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]),

        usage : BufferUsage.STATIC_DRAW

    }); 

    vertexBuffer.vertexArrayDestroyable = false;

    context.cache.billboardCollection_vertexBufferInstanced = vertexBuffer;

    return vertexBuffer;

}

如上，大家可以想象一个矩形（Billboard），中间画一条对角线分成了两个相接的三角形，小学几何里面说过三角形的稳定性，因此，该矩形通过两个三角形确保了样式不变。我们先看看VertexBuffer，顶点数据为：[0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]，也就是Billboard的四个顶点，顶点索引为[0, 1, 2, 0, 2, 3]，把四个点分成了两个三角形（0, 1, 2,）和（0, 2, 3）。这样，我们通过indexBuffer和vertexBuffer，构建了一个Billboard样式，并将它保存在context.cache下，分别是billboardCollection_indexBufferInstanced和billboardCollection_vertexBufferInstanced，作为一个全局的单例。

就好比百米决赛，每个运动员都在高速奔跑，而摄像机也需要实时调整位置，保持一个最佳角度捕捉运动员的动作。一个公告板的的样式确定了，但在不同的位置，角度以及公告报的大小，每个Billboard在不同的位置，这些属性都会不同。因此这些属性就是需要实例化的部分，并且这些属性值（Buffer）需要实时的更新。

createVAF(context, numberOfBillboards, buffersUsage, instanced) {

    // 需要实例化的属性

    var attributes = [

    {

        index : attributeLocations.positionHighAndScale,

        componentsPerAttribute : 4,

        componentDatatype : ComponentDatatype.FLOAT,

        usage : buffersUsage[POSITION_INDEX]

    },

    {

        index : attributeLocations.positionLowAndRotation,

        componentsPerAttribute : 4,

        componentDatatype : ComponentDatatype.FLOAT,

        usage : buffersUsage[POSITION_INDEX]

    },

    // ……

    {

        index : attributeLocations.pixelOffsetScaleByDistance,

        componentsPerAttribute : 4,

        componentDatatype : ComponentDatatype.FLOAT,

        usage : buffersUsage[PIXEL_OFFSET_SCALE_BY_DISTANCE_INDEX]

    }];

    // direction不需要实例化

    if (instanced) {

        attributes.push({

            index : attributeLocations.direction,

            componentsPerAttribute : 2,

            componentDatatype : ComponentDatatype.FLOAT,

            vertexBuffer : getVertexBufferInstanced(context)

        });

    }

    // 计算需要实例化的个数

    // 也就是Billboard的个数

    var sizeInVertices = instanced ? numberOfBillboards : 4 * numberOfBillboards;

    return new VertexArrayFacade(context, attributes, sizeInVertices, instanced);

}

createVAF创建了结构体attributeLocationsInstanced所需要的所有属性，也是渲染每一个Billboard实例时，在顶点着色器中需要的attribute属性，这里主要有三个关键点：（1）只有direction属性创建了vertexBuffer，而其他八个属性是空的，需要实时的更新属性值，也是需要实例化的属性；（2）确定了instanceCount，也就是sizeInVertices；（3）最终Billboard所有attribute属性（实例化和不需要实例化的direction）都交给了VertexArrayFacade。前两点都很明确，现在就看VertexArrayFacade到底干了什么。

还是先思考一下，attribute都已经准备好了，下来应该是CreateVertexArray的过程了，而这里有两处不同，第一，实例化所需要的八个属性并没有VertexBuffer，需要一个机制：（1）对所有实例更新这八个属性值（2）属性中有需要实例化的，需要在attribute中标识instanceDivisor属性为true，而direction则不需要实例化。因此不难理解，VertexArrayFacade就是BillboardCollection和VertexArray之间的一个过渡，用来解决上面的两个问题。

functionVertexArrayFacade(context, attributes, sizeInVertices, instanced) {

    var attrs = VertexArrayFacade._verifyAttributes(attributes);

    var length = attrs.length;

    for (var i = 0; i < length; ++i) {

        var attribute = attrs[i];

        // 如果存在vertexBuffer，比如direction属性

        // 则不需要实时更新属性值

        // 放到precreatedAttributes，可以直接用

        if (attribute.vertexBuffer) {

            precreatedAttributes.push(attribute); continue;

        } 

        // 没有vertexBuffer的

        // 则放到attributesForUsage

        // 后面对这些属性进行赋值

        usage = attribute.usage;

        attributesForUsage = attributesByUsage[usage];

        if (!defined(attributesForUsage)) {

            attributesForUsage = attributesByUsage[usage] = [];

        }

        attributesForUsage.push(attribute);

    }

｝

如上对attribute根据是否需要实例化，进行了区分。然后在渲染时，在更新队列中更新数据：

BillboardCollection.prototype.update = function(frameState) {

    if (billboardsLength > 0) {

        // 创建Attribute属性

        this._vaf = createVAF(context, billboardsLength, this._buffersUsage, this._instanced);

        vafWriters = this._vaf.writers;

        // 数据有更新时，需要重写实例化的属性值

        for (var i = 0; i < billboardsLength; ++i) {

            var billboard = this._billboards[i];

            billboard._dirty = false;

            writeBillboard(this, context, textureAtlasCoordinates, vafWriters, billboard);

        }

        // 创建实例化的VAO，这里使用同一个顶点索引，也就是用一个相同的样式

        this._vaf.commit(getIndexBuffer(context));

    }

}

这里，writeBillboard通过vafWriters方法，将实例化的属性值写入到arraybuffer中，这里就不详细介绍过程了。简单说就三个过程：创建，写，提交。首先在VertexArrayFacade初始化中，最终会调用_resize，这里虽然并不知道实例化attribute属性的值，但所占内存的大小是明确的，所以会在内存中创建一个属性值均为0的arraybuffer。然后在createWriters中实现了写的方法，VertexArrayFacade通过闭包的方式绑定到writers属性中，BillboardCollection中对应：vafWriters = this._vaf.writers，实现属性值的写操作。最后，通过commit提交，创建VAO，将内存中的Buffer传递到显存中。

VertexArrayFacade.prototype.commit = function(indexBuffer){

    for (i = 0, length = allBuffers.length; i < length; ++i) {

        buffer = allBuffers[i];

        // 创建VertexBuffer

        // 将写到arraybuffer中的属性值绑定到显存中

        recreateVA = commit(this, buffer) || recreateVA;

    }

    // 创建attribute，指定实例化属性

    // instanceDivisor : instanced ? 1 : 0

    VertexArrayFacade._appendAttributes(attributes, buffer, offset, this._instanced); 

    // 添加之前已经创建好的非实例化的attribute

    attributes = attributes.concat(this._precreated); 

    // 创建VAO

    va.push({

        va : new VertexArray({

            context : this._context,

            attributes : attributes, indexBuffer : indexBuffer

        }),

        indicesCount : 1.5 * ((k !== (numberOfVertexArrays - 1)) ? (CesiumMath.SIXTY_FOUR_KILOBYTES - 1) : (this._size % (CesiumMath.SIXTY_FOUR_KILOBYTES - 1)))

    });

}

如上，完成了BillboardCollection中VAO的创建。最后一步，就是顶点着色器中如何使用这些属性，这里主要看一个思路，看一下实例化和非实例化之间的区别，以及如何配合：

vec4 computePositionWindowCoordinates(vec4 positionEC, vec2 imageSize, float scale, vec2 direction, vec2 origin, vec2 translate, vec2 pixelOffset, vec3 alignedAxis, bool validAlignedAxis, float rotation, bool sizeInMeters)

{

    vec2 halfSize = imageSize * scale * czm_resolutionScale;

    // 通过direction，判断当前的顶点位于四个顶点中的哪一个

    // 左上,左下，右下，右上？

    // 所有实例的direction都是一致的，因此该属性不需要实例化 

    halfSize *= ((direction * 2.0) - 1.0); 

    // 下面根据实例化的属性来计算该点的真实位置

    ……

}

总结

实例化是一个强大功能，但性能的提升往往需要跟数据紧密联系，需要有一个数据规范的前提，所以Cesium目前对实例化应用的地方并不多，但即使这样，Cesium也意识到必要性，即使WebGL1.0规范并不支持的情况下，也通过扩展的方式来支持。当然，最重要的是能够学习到Cesium对实例化的封装和应用，以及如何理解，哪些不同的attribute需要实例化。

秒客网

Cesium原理篇：6 Render模块(6: Instance实例化)

ModelInstanceCollection

Billboard:

总结

相关文章