What I need to do is to render a relatively huge isometric map on a mobile device (OpenGL ES2.0, Android 4.2). I'm using 1 VBO for the terrain, 1 VBO for the shadow layer, and 1 VBO for the objects layer. The performance is the issue, even with the very first VBO.
我需要做的是在移动设备上渲染一个相对巨大的等距地图(OpenGL ES2.0,Android 4.2)。我正在使用1个VBO用于地形,1个VBO用于阴影层,1个VBO用于对象层。即使是第一个VBO,性能也是问题。
Here we go, I have the data type for filling the VBO:
在这里,我有填充VBO的数据类型:
typedef struct _ovlndVertex
{
GLshort x;
GLshort y;
} ovlndVertex;
typedef struct _ovlndTexture {
GLfloat u;
GLfloat v;
} ovlndTexture;
typedef struct _TexturedVertex
{
ovlndVertex vertices;
ovlndTexture texCoords;
GLshort textureId;
} TexturedVertex;
Let's say we are rendering a 256x256 sized map, 4 vertices per tile, this gives us 256x256x4 = 262144 vertices in the VBO. Given the sizeof(TexturedVertex) == 16, this is 4194304 bytes of raw VBO data (around 4MB).
假设我们正在渲染一个256x256大小的地图,每个地块有4个顶点,这给我们VBO中的256x256x4 = 262144个顶点。给定sizeof(TexturedVertex)== 16,这是4194304字节的原始VBO数据(大约4MB)。
The main rendering job is done in the shaders. Here is their code:
主渲染作业在着色器中完成。这是他们的代码:
The vertex shader:
顶点着色器:
attribute vec4 a_position;
attribute vec2 a_texCoord;
//attribute vec4 a_color;
attribute vec2 a_textureId;
uniform mat4 u_MVPMatrix;
#ifdef GL_ES
//varying lowp vec4 v_fragmentColor;
varying lowp vec2 v_texCoord;
#else
//varying vec4 v_fragmentColor;
varying vec2 v_texCoord;
#endif
varying float v_textureId;
void main()
{
gl_Position = u_MVPMatrix * a_position;
// v_fragmentColor = a_color;
v_texCoord = a_texCoord;
v_textureId = a_textureId.x;
}
The fragment shader:
片段着色器:
#ifdef GL_ES
precision lowp float;
precision lowp int;
precision lowp sampler2D;
#endif
//varying vec4 v_fragmentColor;
varying vec2 v_texCoord;
//uniform sampler2D u_texture0, u_texture1, u_texture2;
uniform sampler2D u_textures[16];
varying float v_textureId;
uniform int u_idxOffset;
void main()
{
// strange enough, but binary search below works WAY faster than
// plain texture array referencing by index like:
// int idx = int(v_textureId) - u_idxOffset;
// gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[idx], v_texCoord);
int index = int(v_textureId) - u_idxOffset;
if(index < 8) //0-7
{
if(index < 4) // 0-4
{
if(index < 2) //0-1
{
if(index < 1)
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[0], v_texCoord);
}
else
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[1], v_texCoord);
}
} else // 2-3
{
if(index < 3)
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[2], v_texCoord);
}
else
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[3], v_texCoord);
}
}
} else //4-7
{
if(index < 6) //4-5
{
if(index < 5)
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[4], v_texCoord);
}
else
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[5], v_texCoord);
}
} else // 6-7
{
if(index < 7)
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[6], v_texCoord);
}
else
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[7], v_texCoord);
}
}
}
} else //8-15
{
if(index < 12) //8-11
{
if(index < 10) //8-9
{
if(index < 9)
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[8], v_texCoord);
}
else
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[9], v_texCoord);
}
} else // 10-11
{
if(index < 11)
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[10], v_texCoord);
}
else
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[11], v_texCoord);
}
}
} else //12-15
{
if(index < 14) //12-13
{
if(index < 13)
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[12], v_texCoord);
}
else
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[13], v_texCoord);
}
} else // 14-15
{
if(index < 15)
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[14], v_texCoord);
}
else
{
gl_FragColor = /*v_fragmentColor * */texture2D(u_textures[15], v_texCoord);
}
}
}
}
}
Yes, I'm aware of OpenGL ES culling of the vertices outside the viewport, and it works perfectly. On a high zoom level it renders smoothly showing 60FPS on the device, however, if I zoom out to view all the map (this is the primary point) the FPS drops to 20-30 even without object rendering and to 4-5 with object rendering (2 additional similar VBOs).
是的,我知道OpenGL ES剔除视口外的顶点,它完美无缺。在高缩放级别上,它可以在设备上平滑显示60FPS,但是,如果我缩小以查看所有地图(这是主要点),即使没有对象渲染,FPS也会下降到20-30,而对于对象则为4-5渲染(另外2个类似的VBO)。
Here is the ADT's OpenGL ES Tracer output for a typical slow-to-render frame: (Wall Clock time: 0.49ms, Thread Time 0.37ms)
这是ADT的OpenGL ES Tracer输出,用于典型的慢渲染帧:(挂钟时间:0.49ms,线程时间0.37ms)
glClear(mask = 16640)
glUseProgram(program = 22)
glBindTexture(target = GL_TEXTURE_2D, texture = 2)
glActiveTexture(texture = GL_TEXTURE1)
glActiveTexture(texture = GL_TEXTURE2)
glActiveTexture(texture = GL_TEXTURE3)
glActiveTexture(texture = GL_TEXTURE4)
glActiveTexture(texture = GL_TEXTURE5)
glActiveTexture(texture = GL_TEXTURE6)
glActiveTexture(texture = GL_TEXTURE7)
glActiveTexture(texture = GL_TEXTURE8)
glActiveTexture(texture = GL_TEXTURE9)
glActiveTexture(texture = GL_TEXTURE10)
glActiveTexture(texture = GL_TEXTURE11)
glActiveTexture(texture = GL_TEXTURE12)
glUniform1i(location = 1, x = 0)
glBindBuffer(target = GL_ARRAY_BUFFER, buffer = 123)
glVertexAttribPointer(indx = 0, size = 2, type = GL_SHORT, normalized = false, stride = 16, ptr = 0x0)
glEnableVertexAttribArray(index = 1)
glEnableVertexAttribArray(index = 2)
glEnableVertexAttribArray(index = 0)
glEnableVertexAttribArray(index = 3)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 16, ptr = 0x0)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 16, ptr = 0x4)
glVertexAttribPointer(indx = 3, size = 1, type = GL_SHORT, normalized = false, stride = 16, ptr = 0xc)
glBindBuffer(target = GL_ELEMENT_ARRAY_BUFFER, buffer = 124)
glDrawElements(mode = GL_TRIANGLES, count = 469374, type = GL_UNSIGNED_INT, indices = 0x0)
glBindBuffer(target = GL_ARRAY_BUFFER, buffer = 0)
glBindBuffer(target = GL_ELEMENT_ARRAY_BUFFER, buffer = 0)
glActiveTexture(texture = GL_TEXTURE0)
glUseProgram(program = 7)
glDisableVertexAttribArray(index = 2)
glVertexAttribPointer(indx = 0, size = 2, type = GL_FLOAT, normalized = false, stride = 0, ptr = 0x775470c0)
glVertexAttribPointer(indx = 1, size = 4, type = GL_FLOAT, normalized = false, stride = 0, ptr = 0x775470e0)
glVertexAttribPointerData(indx = 0, size = 2, type = GL_FLOAT, normalized = false, stride = 8, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_FLOAT, normalized = false, stride = 16, ptr = 0x??, minIndex = 0, maxIndex = 4)
glBlendFunc(sfactor = GL_SRC_ALPHA, dfactor = GL_ONE_MINUS_SRC_ALPHA)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUseProgram(program = 1)
glBlendFunc(sfactor = GL_LINES, dfactor = GL_ONE_MINUS_SRC_ALPHA)
glBindTexture(target = GL_TEXTURE_2D, texture = 0)
glEnableVertexAttribArray(index = 2)
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77530800)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77530810)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x7753080c)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x775295f8)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77529608)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77529604)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77529878)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77529888)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77529884)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
eglSwapBuffers
FYI, here is the typical fast-rendering GUI frame (main menu actually, that gives me 60FPS): (Wall Clock time: 1.10MS, Thread Time 0.70MS)
仅供参考,这是典型的快速渲染GUI框架(主菜单实际上,它给我60FPS):(挂钟时间:1.10MS,线程时间0.70MS)
glClear(mask = 16640)
glUseProgram(program = 7)
glDisableVertexAttribArray(index = 2)
glVertexAttribPointer(indx = 0, size = 2, type = GL_FLOAT, normalized = false, stride = 0, ptr = 0x77538880)
glVertexAttribPointer(indx = 1, size = 4, type = GL_FLOAT, normalized = false, stride = 0, ptr = 0x775388a0)
glBlendFunc(sfactor = GL_SRC_ALPHA, dfactor = GL_ONE_MINUS_SRC_ALPHA)
glVertexAttribPointerData(indx = 0, size = 2, type = GL_FLOAT, normalized = false, stride = 8, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_FLOAT, normalized = false, stride = 16, ptr = 0x??, minIndex = 0, maxIndex = 4)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUseProgram(program = 1)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, 0.093229175, -0.17300725, 0.0, 1.0])
glBlendFunc(sfactor = GL_LINES, dfactor = GL_ONE_MINUS_SRC_ALPHA)
glBindTexture(target = GL_TEXTURE_2D, texture = 1)
glEnableVertexAttribArray(index = 2)
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77546f08)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77546f18)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77546f14)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, -0.23489577, -0.17300725, 0.0, 1.0])
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77538cb0)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77538cc0)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77538cbc)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, -0.20677078, -0.17300725, 0.0, 1.0])
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77538f00)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77538f10)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77538f0c)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, -0.15572912, -0.17300725, 0.0, 1.0])
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77539150)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77539160)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x7753915c)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, -0.1057291, -0.17300725, 0.0, 1.0])
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x775393a0)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x775393b0)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x775393ac)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, -0.0567708, -0.17300725, 0.0, 1.0])
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77537bc0)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77537bd0)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77537bcc)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, -0.020312428, -0.17300725, 0.0, 1.0])
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77537e10)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77537e20)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77537e1c)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, 0.02239585, -0.17300725, 0.0, 1.0])
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77538060)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77538070)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x7753806c)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUseProgram(program = 7)
glDisableVertexAttribArray(index = 2)
glVertexAttribPointer(indx = 0, size = 2, type = GL_FLOAT, normalized = false, stride = 0, ptr = 0x77533090)
glVertexAttribPointerData(indx = 0, size = 2, type = GL_FLOAT, normalized = false, stride = 8, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_FLOAT, normalized = false, stride = 16, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_FLOAT, normalized = false, stride = 0, ptr = 0x775330b0)
glBlendFunc(sfactor = GL_SRC_ALPHA, dfactor = GL_ONE_MINUS_SRC_ALPHA)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glUseProgram(program = 1)
glUniformMatrix4fv(location = 0, count = 1, transpose = false, value = [0.0010416667, 0.0, 0.0, 0.0, 0.0, 0.0018115942, 0.0, 0.0, 0.0, 0.0, -9.765625E-4, 0.0, -1.0, -1.0, 0.0, 1.0])
glBlendFunc(sfactor = GL_LINES, dfactor = GL_ONE_MINUS_SRC_ALPHA)
glBindTexture(target = GL_TEXTURE_2D, texture = 0)
glEnableVertexAttribArray(index = 2)
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77530800)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77530810)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x7753080c)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x775295f8)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77529608)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77529604)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
glVertexAttribPointer(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77529878)
glVertexAttribPointer(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x77529888)
glVertexAttribPointer(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x77529884)
glVertexAttribPointerData(indx = 0, size = 3, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 1, size = 4, type = GL_UNSIGNED_BYTE, normalized = true, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glVertexAttribPointerData(indx = 2, size = 2, type = GL_FLOAT, normalized = false, stride = 24, ptr = 0x??, minIndex = 0, maxIndex = 4)
glDrawArrays(mode = GL_TRIANGLE_STRIP, first = 0, count = 4)
eglSwapBuffers
This renders OpenGL Tracers useless for me, because a faster-rendering frame shows more time (both Wall clock & Thread) than the slow one. Does this mean the problem is hidden somewhere in shaders? How do I profile them?
这使得OpenGL Tracers对我来说毫无用处,因为更快的渲染帧显示的时间(壁钟和线程)都比慢速渲染帧多。这是否意味着问题隐藏在着色器的某处?我如何描述它们?
What I tried so far:
到目前为止我尝试了什么:
- changed all the GLuint to GLshort to minimize buffer size (gave me 28 to 16 bytes optimization of a vertex struct sizeof)
- played with GL_DYNAMIC_DRAW/GL_STATIC_DRAW method of sending the data to VBO (no difference to me)
- tried using lowp wherever I can
- tried single-textured rendering (no difference, this make me think the shaders are doing well)
- reduced the OpenGL calls to an absolute minimum (you can see the OGL call log above)
- removed all glGet* functions that could break the pipeline rendering
将所有GLuint更改为GLshort以最小化缓冲区大小(给了我28到16个字节的顶点struct sizeof优化)
使用GL_DYNAMIC_DRAW / GL_STATIC_DRAW方法将数据发送到VBO(对我来说没什么区别)
尝试使用lowp尽我所能
尝试单纹理渲染(没有区别,这让我觉得着色器做得很好)
将OpenGL调用减少到绝对最小值(您可以看到上面的OGL调用日志)
删除了可能破坏管道渲染的所有glGet *函数
Any advice I could optimize this ?
有什么建议可以优化吗?
2 个解决方案
#1
0
You already did a good job. There is probably no more room for more micro optimisations.
你已经做得很好。可能没有更多的微观优化空间。
Because there is a very little number of drawcalls, using VAO/VertexArrays to remove glVertexAttribPointer from the rendering loop should not help much.
因为只有很少的drawcalls,使用VAO / VertexArrays从渲染循环中删除glVertexAttribPointer应该没什么用。
4 vertices per tile
每个图块4个顶点
Why are you using GL_TRIANGLE_STRIP and glDrawArrays ? glDrawElements with indices will give you more flexibility (see after...).
你为什么使用GL_TRIANGLE_STRIP和glDrawArrays?带索引的glDrawElements将为您提供更大的灵活性(请参阅...)。
20-30 even without object rendering and to 4-5 with object rendering (2 additional similar VBOs).
20-30甚至没有对象渲染,4-5与对象渲染(2个额外的类似VBO)。
okay. so you end up with about 800k vertices. right ? I have faced similar problems last year (1000k vertices). I finally choose to send more drawcalls instead of using complex shaders. Yes, binding a lot of data (16 textures binding !) slow down everything on this type of device.
好的。所以你最终得到大约800k顶点。对 ?去年我遇到过类似的问题(1000k顶点)。我最终选择发送更多的drawcalls,而不是使用复杂的着色器。是的,绑定大量数据(16个纹理绑定!)会降低此类设备上的所有内容。
if I zoom out to view all the map (this is the primary point) the FPS drops
如果我缩小以查看所有地图(这是主要点),FPS会下降
On an iPad 2, 1000k vertices on screen (with a lot more data per vertex than you) should render faster: more than 20 fps.
在iPad 2上,屏幕上的1000k顶点(每个顶点的数据比你多得多)应该渲染得更快:超过20 fps。
It appears than most of your data are static. You could try to build a vbo or a list of indices per texture (i don't kown which is faster), simplify your fragment shader a lot.
它看起来比大多数数据都是静态的。你可以尝试为每个纹理构建一个vbo或索引列表(我不知道哪个更快),简化你的片段着色器很多。
- bind a vbo with all "static data" (vertex positions, texture coordinates)
- for each texture
- bind the texture
- use glDrawElements with 'texture specific' list of indices.
绑定纹理
将glDrawElements与'特定于纹理'的索引列表一起使用。
用所有“静态数据”(顶点位置,纹理坐标)绑定vbo
对于每个纹理绑定纹理使用glDrawElements与'纹理特定'索引列表。
You can store indices in a buffer: glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ...) Or you can pass indices directly to glBindBuffer if they are dynamic. It's probably faster than you think.
您可以将索引存储在缓冲区中:glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,...)或者如果它们是动态的,您可以将索引直接传递给glBindBuffer。它可能比你想象的要快。
Last detail: lowp everywhere is dangerous. try mediump.
最后的细节:lowp到处都很危险。尝试mediump。
Good luck, you are touching the device limits !
祝你好运,你正在触及设备限制!
#2
0
Since the fast frame shows a higher time for the graphics trace, it's likely that something else is causing the slowdown. You should profile the CPU and see what your main and render threads are doing. Hopefully it will show the bottleneck.
由于快速帧显示图形跟踪的时间较长,因此其他可能导致速度减慢。您应该分析CPU并查看主要和渲染线程正在做什么。希望它会显示出瓶颈。
It would be useful to profile the CPU when you render a zoomed in view versus a zoomed out one, and see if anything stands out as a major difference between the two.
渲染放大视图与缩小视图时对CPU进行剖析非常有用,并查看是否有任何突出显示为两者之间的主要差异。
#1
0
You already did a good job. There is probably no more room for more micro optimisations.
你已经做得很好。可能没有更多的微观优化空间。
Because there is a very little number of drawcalls, using VAO/VertexArrays to remove glVertexAttribPointer from the rendering loop should not help much.
因为只有很少的drawcalls,使用VAO / VertexArrays从渲染循环中删除glVertexAttribPointer应该没什么用。
4 vertices per tile
每个图块4个顶点
Why are you using GL_TRIANGLE_STRIP and glDrawArrays ? glDrawElements with indices will give you more flexibility (see after...).
你为什么使用GL_TRIANGLE_STRIP和glDrawArrays?带索引的glDrawElements将为您提供更大的灵活性(请参阅...)。
20-30 even without object rendering and to 4-5 with object rendering (2 additional similar VBOs).
20-30甚至没有对象渲染,4-5与对象渲染(2个额外的类似VBO)。
okay. so you end up with about 800k vertices. right ? I have faced similar problems last year (1000k vertices). I finally choose to send more drawcalls instead of using complex shaders. Yes, binding a lot of data (16 textures binding !) slow down everything on this type of device.
好的。所以你最终得到大约800k顶点。对 ?去年我遇到过类似的问题(1000k顶点)。我最终选择发送更多的drawcalls,而不是使用复杂的着色器。是的,绑定大量数据(16个纹理绑定!)会降低此类设备上的所有内容。
if I zoom out to view all the map (this is the primary point) the FPS drops
如果我缩小以查看所有地图(这是主要点),FPS会下降
On an iPad 2, 1000k vertices on screen (with a lot more data per vertex than you) should render faster: more than 20 fps.
在iPad 2上,屏幕上的1000k顶点(每个顶点的数据比你多得多)应该渲染得更快:超过20 fps。
It appears than most of your data are static. You could try to build a vbo or a list of indices per texture (i don't kown which is faster), simplify your fragment shader a lot.
它看起来比大多数数据都是静态的。你可以尝试为每个纹理构建一个vbo或索引列表(我不知道哪个更快),简化你的片段着色器很多。
- bind a vbo with all "static data" (vertex positions, texture coordinates)
- for each texture
- bind the texture
- use glDrawElements with 'texture specific' list of indices.
绑定纹理
将glDrawElements与'特定于纹理'的索引列表一起使用。
用所有“静态数据”(顶点位置,纹理坐标)绑定vbo
对于每个纹理绑定纹理使用glDrawElements与'纹理特定'索引列表。
You can store indices in a buffer: glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ...) Or you can pass indices directly to glBindBuffer if they are dynamic. It's probably faster than you think.
您可以将索引存储在缓冲区中:glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,...)或者如果它们是动态的,您可以将索引直接传递给glBindBuffer。它可能比你想象的要快。
Last detail: lowp everywhere is dangerous. try mediump.
最后的细节:lowp到处都很危险。尝试mediump。
Good luck, you are touching the device limits !
祝你好运,你正在触及设备限制!
#2
0
Since the fast frame shows a higher time for the graphics trace, it's likely that something else is causing the slowdown. You should profile the CPU and see what your main and render threads are doing. Hopefully it will show the bottleneck.
由于快速帧显示图形跟踪的时间较长,因此其他可能导致速度减慢。您应该分析CPU并查看主要和渲染线程正在做什么。希望它会显示出瓶颈。
It would be useful to profile the CPU when you render a zoomed in view versus a zoomed out one, and see if anything stands out as a major difference between the two.
渲染放大视图与缩小视图时对CPU进行剖析非常有用,并查看是否有任何突出显示为两者之间的主要差异。