draw: fix draw_llvm_variant_key struct padding to avoid recompiles
authorRoland Scheidegger <sroland@vmware.com>
Tue, 29 Jan 2013 16:39:09 +0000 (08:39 -0800)
committerRoland Scheidegger <sroland@vmware.com>
Tue, 29 Jan 2013 16:40:52 +0000 (08:40 -0800)
The struct padding got broken by c789b981b244333cfc903bcd1e2fefc010500013.
This caused serious performance regression because part of the key was
uninitialized and hence the shader always recompiled (at least on release
builds...).
While here also fix key size calculation when the number of samplers
and the number of sampler views are different.

v2: add comment

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/auxiliary/draw/draw_llvm.h
src/gallium/auxiliary/draw/draw_vs_llvm.c

index afb10a6a33acd995d02a0af37621af20823daded..dc83f805a0ff341968f6ddd81caf4c283806769e 100644 (file)
@@ -1378,7 +1378,8 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
    key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
-   key->pad = 0;
+   key->pad1 = 0;
+   key->pad2 = 0;
 
    /* All variants of this shader will have the same value for
     * nr_samplers.  Not yet trying to compact away holes in the
index a6648573aa523463eb50c30bc6a60e706dcbda38..17ca3047594b9feff3f0ec674c53508d34700f56 100644 (file)
@@ -206,8 +206,13 @@ struct draw_llvm_variant_key
    unsigned clip_halfz:1;
    unsigned bypass_viewport:1;
    unsigned need_edgeflags:1;
+   /*
+    * it is important there are no holes in this struct
+    * (and all padding gets zeroed).
+    */
+   unsigned pad1:1;
    unsigned ucp_enable:PIPE_MAX_CLIP_PLANES;
-   unsigned pad:33-PIPE_MAX_CLIP_PLANES;
+   unsigned pad2:32-PIPE_MAX_CLIP_PLANES;
 
    /* Variable number of vertex elements:
     */
index 3e46f8c2cf3d699fa9e275f2be0487c9efbc115c..ac3999efc6826ffbaf7591387b91c30bcd70db38 100644 (file)
@@ -100,8 +100,9 @@ draw_create_vs_llvm(struct draw_context *draw,
 
    vs->variant_key_size = 
       draw_llvm_variant_key_size(
-        vs->base.info.file_max[TGSI_FILE_INPUT]+1,
-        vs->base.info.file_max[TGSI_FILE_SAMPLER]+1);
+         vs->base.info.file_max[TGSI_FILE_INPUT]+1,
+         MAX2(vs->base.info.file_max[TGSI_FILE_SAMPLER]+1,
+              vs->base.info.file_max[TGSI_FILE_SAMPLER_VIEW]+1));
 
    vs->base.state.stream_output = state->stream_output;
    vs->base.draw = draw;