Strange performance hit caused by single cycle

I am currently writing an OpenGL 3.1 application (with GLSL version 330) on linux (an NVIDIA 360M card with a 313.0 nv driver), which has about 15 thousand lines. My problem is that in one of my vertex shaders I can experience sharp drops in punching, making minimal changes to the code, which should actually be non-op.

For example:

// With this solution my program runs with 3-5 fps
for(int i = 0; i < 4; ++i) {
  vout.shadowCoord[i] = uShadowCP[i] * w_pos;
}

// But with this it runs with 30+ fps
vout.shadowCoord[0] = uShadowCP[0] * w_pos;
vout.shadowCoord[1] = uShadowCP[1] * w_pos;
vout.shadowCoord[2] = uShadowCP[2] * w_pos;
vout.shadowCoord[3] = uShadowCP[3] * w_pos;

// This works with 30+ fps too
vec4 shadowCoords[4];
for(int i = 0; i < 4; ++i) {
  shadowCoords[i] = uShadowCP[i] * w_pos;
}
for(int i = 0; i < 4; ++i) {
  vout.shadowCoord[i] = shadowCoords[i];
}

Or consider this:

uniform int uNumUsedShadowMaps = 4; // edit: I called this "random_uniform" in the original question

// 8 fps
for(int i = 0; i < min(uNumUsedShadowMaps, 4); ++i) {
    vout.shadowCoord[i] = vec4(1.0);
}

// 30+ fps
for(int i = 0; i < 4; ++i) {
  if(i < uNumUsedShadowMaps) {
    vout.shadowCoord[i] = vec4(1.0);
  } else {
    vout.shadowCoord[i] = vec4(0.0);
  }
}

See all the shader code here for this problem: http://pastebin.com/LK5CNJPD

As any idea would be appreciated, what might trigger them.

+4
source share
2 answers

Finally, I managed to find the cause of the problem, and also find a solution to it.

, , "".

Vertex Shader:

#version 330 

vec3 CountPosition(); // Irrelevant how it is implemented.

uniform mat4 uProjectionMatrix, uCameraMatrix;

out VertexData {
    vec3 c_pos, w_pos;
    vec4 shadowCoord[4];
} vout;

void main() {
    vout.w_pos = CountPosition();
    vout.c_pos = (uCameraMatrix * vec4(vout.w_pos, 1.0)).xyz;
    vec4 w_pos = vec4(vout.w_pos, 1.0);

    // 20 fps
    for(int i = 0; i < 4; ++i) {
        vout.shadowCoord[i] = uShadowCP[i] * w_pos;
    }

    // 50 fps
    vout.shadowCoord[0] = uShadowCP[0] * w_pos;
    vout.shadowCoord[1] = uShadowCP[1] * w_pos;
    vout.shadowCoord[2] = uShadowCP[2] * w_pos;
    vout.shadowCoord[3] = uShadowCP[3] * w_pos;

    gl_Position = uProjectionMatrix * vec4(vout.c_pos, 1.0);
}

:

#version 330

in VertexData {
    vec3 c_pos, w_pos;
    vec4 shadowCoord[4];
} vin;

out vec4 frag_color;

void main() {
    frag_color = vec4(1.0);
}

, , 50 . :

void main() {
    vec4 w_pos = vec4(CountPosition(), 1.0);
    vec4 c_pos = uCameraMatrix * w_pos;

    vout.w_pos = vec3(w_pos);
    vout.c_pos = vec3(c_pos);

    // 50 fps
    for(int i = 0; i < 4; ++i) {
        vout.shadowCoord[i] = uShadowCP[i] * w_pos;
    }

    // 50 fps
    vout.shadowCoord[0] = uShadowCP[0] * w_pos;
    vout.shadowCoord[1] = uShadowCP[1] * w_pos;
    vout.shadowCoord[2] = uShadowCP[2] * w_pos;
    vout.shadowCoord[3] = uShadowCP[3] * w_pos;

    gl_Position = uProjectionMatrix * c_pos;
}

, , out.

:

, , , . , OpenGL GL undefined, .

- , GLSL 330 , . , , .


P.S.

, -, , , i < min(uNumUsedShadowMaps, 4) as for , , - , , performace, i < min(uNumUsedShadowMaps, 4) 50 fps .

+2

, min (random_uniform, 4) . CPU, gpu, . , min 4 , 4 , .

:

 int check = min(random_uniform,4);
 for(int i = 0; i < check; ++i) {
     vout.shadowCoord[i] = vec4(1.0);
 }

min (random_uniform, 4) , 4 .

random_uniform. ? ?

0

All Articles