gallivm: implement better control of per-quad/per-element/scalar lod
authorRoland Scheidegger <sroland@vmware.com>
Mon, 19 Aug 2013 19:12:59 +0000 (21:12 +0200)
committerRoland Scheidegger <sroland@vmware.com>
Tue, 20 Aug 2013 21:00:24 +0000 (23:00 +0200)
There's a new debug value used to disable per-quad lod optimizations
in fragment shader (ignored for vs/gs as the results are just too wrong
typically). Also trying to detect if a supplied lod value is really a
scalar (if it's coming from immediate or constant file) in which case
sampler code can use this to stay on per-quad-lod path (in fact for
explicit lod could simplify even further and use same lod for both
quads in the avx case but this is not implemented yet).
Still need to actually implement per-element lod bias (and derivatives),
and need to handle per-element lod in size queries.

v2: fix comments, prettify.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/draw/draw_llvm_sample.c
src/gallium/auxiliary/gallivm/lp_bld_debug.h
src/gallium/auxiliary/gallivm/lp_bld_init.c
src/gallium/auxiliary/gallivm/lp_bld_sample.h
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
src/gallium/drivers/llvmpipe/lp_tex_sample.c

index 97b025565c3106c209bb6fbc5e626912af0404f9..a6341fae3cdc36daf36bedb99b758a9dd8d67369 100644 (file)
@@ -238,7 +238,7 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
                                        const struct lp_derivatives *derivs,
                                        LLVMValueRef lod_bias, /* optional */
                                        LLVMValueRef explicit_lod, /* optional */
-                                       boolean scalar_lod,
+                                       enum lp_sampler_lod_property lod_property,
                                        LLVMValueRef *texel)
 {
    struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa *)base;
@@ -257,7 +257,7 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
                        coords,
                        offsets,
                        derivs,
-                       lod_bias, explicit_lod, scalar_lod,
+                       lod_bias, explicit_lod, lod_property,
                        texel);
 }
 
@@ -272,7 +272,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
                                       unsigned texture_unit,
                                       unsigned target,
                                       boolean is_sviewinfo,
-                                      boolean scalar_lod,
+                                      enum lp_sampler_lod_property lod_property,
                                       LLVMValueRef explicit_lod, /* optional */
                                       LLVMValueRef *sizes_out)
 {
@@ -287,7 +287,7 @@ draw_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
                            texture_unit,
                            target,
                            is_sviewinfo,
-                           scalar_lod,
+                           lod_property,
                            explicit_lod,
                            sizes_out);
 }
index 4f38edf11f857d21fe1215504709debe17c97948..76c39af65be0f5b428aaae672ae5237b7c04dbe3 100644 (file)
@@ -43,7 +43,8 @@
 #define GALLIVM_DEBUG_PERF          (1 << 4)
 #define GALLIVM_DEBUG_NO_BRILINEAR  (1 << 5)
 #define GALLIVM_DEBUG_NO_RHO_APPROX (1 << 6)
-#define GALLIVM_DEBUG_GC            (1 << 7)
+#define GALLIVM_DEBUG_NO_QUAD_LOD   (1 << 7)
+#define GALLIVM_DEBUG_GC            (1 << 8)
 
 
 #ifdef __cplusplus
index e4cc0582a7010e86fa432c092b62f34922cdf51c..61eadb838dc601a54174857580b5b9779467259b 100644 (file)
@@ -80,6 +80,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
    { "perf",   GALLIVM_DEBUG_PERF, NULL },
    { "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL },
    { "no_rho_approx", GALLIVM_DEBUG_NO_RHO_APPROX, NULL },
+   { "no_quad_lod", GALLIVM_DEBUG_NO_QUAD_LOD, NULL },
    { "gc",     GALLIVM_DEBUG_GC, NULL },
    DEBUG_NAMED_VALUE_END
 };
index 6d8fe8890d6f2e5f0cd414ba187089ff995987c6..6d173770da775ee6867af888b76bbe4d0d6be4f8 100644 (file)
@@ -61,6 +61,13 @@ struct lp_derivatives
 };
 
 
+enum lp_sampler_lod_property {
+   LP_SAMPLER_LOD_SCALAR,
+   LP_SAMPLER_LOD_PER_ELEMENT,
+   LP_SAMPLER_LOD_PER_QUAD
+};
+
+
 /**
  * Texture static state.
  *
@@ -476,7 +483,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                     const struct lp_derivatives *derivs,
                     LLVMValueRef lod_bias,
                     LLVMValueRef explicit_lod,
-                    boolean scalar_lod,
+                    enum lp_sampler_lod_property lod_property,
                     LLVMValueRef texel_out[4]);
 
 
@@ -497,7 +504,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
                         unsigned texture_unit,
                         unsigned target,
                         boolean is_sviewinfo,
-                        boolean scalar_lod,
+                        enum lp_sampler_lod_property lod_property,
                         LLVMValueRef explicit_lod,
                         LLVMValueRef *sizes_out);
 
index 20a08cbb7a0d7738dd0582455f96ba97d395c4a1..2ffe21f97d69d7657c05c6028018f984ea31bf0d 100644 (file)
@@ -1646,7 +1646,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                     const struct lp_derivatives *derivs, /* optional */
                     LLVMValueRef lod_bias, /* optional */
                     LLVMValueRef explicit_lod, /* optional */
-                    boolean scalar_lod,
+                    enum lp_sampler_lod_property lod_property,
                     LLVMValueRef texel_out[4])
 {
    unsigned target = static_texture_state->target;
@@ -1733,7 +1733,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
     * There are other situations where at least the multiple int lods could be
     * avoided like min and max lod being equal.
     */
-   if (explicit_lod && !scalar_lod &&
+   if (explicit_lod && lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
        ((is_fetch && target != PIPE_BUFFER) ||
         (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
       bld.num_lods = type.length;
@@ -1925,7 +1925,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          bld4.levelf_type.length = 1;
          bld4.leveli_type = lp_int_type(bld4.levelf_type);
 
-         if (explicit_lod && !scalar_lod &&
+         if (explicit_lod && lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
              ((is_fetch && target != PIPE_BUFFER) ||
               (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
             bld4.num_lods = type4.length;
@@ -2046,7 +2046,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
                         unsigned texture_unit,
                         unsigned target,
                         boolean is_sviewinfo,
-                        boolean scalar_lod,
+                        enum lp_sampler_lod_property lod_property,
                         LLVMValueRef explicit_lod,
                         LLVMValueRef *sizes_out)
 {
index 9d27f5fa94d91834c69c16dc3945ff9af3035716..522302ef4f7a432d813964e1f687ddc37a011fbc 100644 (file)
@@ -40,6 +40,7 @@
 #include "gallivm/lp_bld.h"
 #include "gallivm/lp_bld_tgsi_action.h"
 #include "gallivm/lp_bld_limits.h"
+#include "gallivm/lp_bld_sample.h"
 #include "lp_bld_type.h"
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
@@ -184,7 +185,7 @@ struct lp_build_sampler_soa
                         const struct lp_derivatives *derivs,
                         LLVMValueRef lod_bias, /* optional */
                         LLVMValueRef explicit_lod, /* optional */
-                        boolean scalar_lod,
+                        enum lp_sampler_lod_property,
                         LLVMValueRef *texel);
 
    void
@@ -194,7 +195,7 @@ struct lp_build_sampler_soa
                        unsigned unit,
                        unsigned target,
                        boolean need_nr_mips,
-                       boolean scalar_lod,
+                       enum lp_sampler_lod_property,
                        LLVMValueRef explicit_lod, /* optional */
                        LLVMValueRef *sizes_out);
 };
index cab53dfd35df31a22750e3bdb0788217231a1a22..4c6b6ec5ab62937aa205d57d384034d90d79f62a 100644 (file)
@@ -1601,6 +1601,56 @@ tgsi_to_pipe_tex_target(unsigned tgsi_target)
    }
 }
 
+
+static enum lp_sampler_lod_property
+lp_build_lod_property(
+   struct lp_build_tgsi_context *bld_base,
+   const struct tgsi_full_instruction *inst,
+   unsigned src_op)
+{
+   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+   enum lp_sampler_lod_property lod_property;
+
+   /*
+    * Not much we can do here. We could try catching inputs declared
+    * with constant interpolation but not sure it's worth it - since for
+    * TEX opcodes as well as FETCH/LD the lod comes from same reg as
+    * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
+    * like the constant/immediate recognition below.
+    * What seems to be of more value would be to recognize temps holding
+    * broadcasted scalars but no way we can do it.
+    * Tried asking llvm but without any success (using LLVMIsConstant
+    * even though this isn't exactly what we'd need), even as simple as
+    * IMM[0] UINT32 (0,-1,0,0)
+    * MOV TEMP[0] IMM[0].yyyy
+    * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
+    * doesn't work.
+    * This means there's ZERO chance this will ever catch a scalar lod
+    * with traditional tex opcodes as well as texel fetches, since the lod
+    * comes from the same reg as coords (except some test shaders using
+    * constant coords maybe).
+    * There's at least hope for sample opcodes as well as size queries.
+    */
+   if (reg->Register.File == TGSI_FILE_CONSTANT ||
+       reg->Register.File == TGSI_FILE_IMMEDIATE) {
+      lod_property = LP_SAMPLER_LOD_SCALAR;
+   }
+   else if (bld_base->info->processor == TGSI_PROCESSOR_FRAGMENT) {
+      if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
+         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+      }
+      else {
+         lod_property = LP_SAMPLER_LOD_PER_QUAD;
+      }
+   }
+   else {
+      /* never use scalar (per-quad) lod the results are just too wrong. */
+      lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+   }
+   return lod_property;
+}
+
+
 /**
  * High-level instruction translators.
  */
@@ -1618,7 +1668,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    LLVMValueRef offsets[3] = { NULL };
    struct lp_derivatives derivs;
    struct lp_derivatives *deriv_ptr = NULL;
-   boolean scalar_lod;
+   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
    unsigned num_derivs, num_offsets, i;
    unsigned shadow_coord = 0;
    unsigned layer_coord = 0;
@@ -1690,13 +1740,18 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    }
 
    /* Note lod and especially projected are illegal in a LOT of cases */
-   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
-      lod_bias = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
-      explicit_lod = NULL;
-   }
-   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
-      lod_bias = NULL;
-      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
+   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
+       modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+      LLVMValueRef lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
+      if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
+         lod_bias = lod;
+         explicit_lod = NULL;
+      }
+      else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+         lod_bias = NULL;
+         explicit_lod = lod;
+      }
+      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
    }
    else {
       lod_bias = NULL;
@@ -1738,6 +1793,21 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
       }
       deriv_ptr = &derivs;
       unit = inst->Src[3].Register.Index;
+      /*
+       * could also check all src regs if constant but I doubt such
+       * cases exist in practice.
+       */
+      if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
+         if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
+            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+         }
+         else {
+            lod_property = LP_SAMPLER_LOD_PER_QUAD;
+         }
+      }
+      else {
+         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+      }
    } else {
       unit = inst->Src[1].Register.Index;
    }
@@ -1750,9 +1820,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
       }
    }
 
-   /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
-   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
-
    bld->sampler->emit_fetch_texel(bld->sampler,
                                   bld->bld_base.base.gallivm,
                                   bld->bld_base.base.type,
@@ -1761,7 +1828,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
                                   coords,
                                   offsets,
                                   deriv_ptr,
-                                  lod_bias, explicit_lod, scalar_lod,
+                                  lod_bias, explicit_lod, lod_property,
                                   texel);
 }
 
@@ -1779,7 +1846,8 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
    LLVMValueRef offsets[3] = { NULL };
    struct lp_derivatives derivs;
    struct lp_derivatives *deriv_ptr = NULL;
-   boolean scalar_lod;
+   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
+
    unsigned num_offsets, num_derivs, i;
    unsigned layer_coord = 0;
 
@@ -1841,13 +1909,18 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
       return;
    }
 
-   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
-      lod_bias = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
-      explicit_lod = NULL;
-   }
-   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
-      lod_bias = NULL;
-      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
+   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
+       modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+      LLVMValueRef lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
+      if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
+         lod_bias = lod;
+         explicit_lod = NULL;
+      }
+      else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+         lod_bias = NULL;
+         explicit_lod = lod;
+      }
+      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
    }
    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
       lod_bias = NULL;
@@ -1885,6 +1958,21 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
       }
       deriv_ptr = &derivs;
+      /*
+       * could also check all src regs if constant but I doubt such
+       * cases exist in practice.
+       */
+      if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
+         if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
+            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+         }
+         else {
+            lod_property = LP_SAMPLER_LOD_PER_QUAD;
+         }
+      }
+      else {
+         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+      }
    }
 
    /* some advanced gather instructions (txgo) would require 4 offsets */
@@ -1895,10 +1983,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
       }
    }
 
-   /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
-   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT ||
-                modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO;
-
    bld->sampler->emit_fetch_texel(bld->sampler,
                                   bld->bld_base.base.gallivm,
                                   bld->bld_base.base.type,
@@ -1907,7 +1991,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
                                   coords,
                                   offsets,
                                   deriv_ptr,
-                                  lod_bias, explicit_lod, scalar_lod,
+                                  lod_bias, explicit_lod, lod_property,
                                   texel);
 
    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
@@ -1935,7 +2019,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
    LLVMValueRef explicit_lod = NULL;
    LLVMValueRef coords[3];
    LLVMValueRef offsets[3] = { NULL };
-   boolean scalar_lod;
+   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
    unsigned dims, i;
    unsigned layer_coord = 0;
 
@@ -1984,6 +2068,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
    /* always have lod except for buffers ? */
    if (target != TGSI_TEXTURE_BUFFER) {
       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
+      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
    }
 
    for (i = 0; i < dims; i++) {
@@ -2002,9 +2087,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
       }
    }
 
-   /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
-   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
-
    bld->sampler->emit_fetch_texel(bld->sampler,
                                   bld->bld_base.base.gallivm,
                                   bld->bld_base.base.type,
@@ -2013,7 +2095,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
                                   coords,
                                   offsets,
                                   NULL,
-                                  NULL, explicit_lod, scalar_lod,
+                                  NULL, explicit_lod, lod_property,
                                   texel);
 
    if (is_samplei &&
@@ -2038,7 +2120,7 @@ emit_size_query( struct lp_build_tgsi_soa_context *bld,
                  boolean is_sviewinfo)
 {
    LLVMValueRef explicit_lod;
-   boolean scalar_lod;
+   enum lp_sampler_lod_property lod_property;
    unsigned has_lod;
    unsigned i;
    unsigned unit = inst->Src[1].Register.Index;
@@ -2068,22 +2150,24 @@ emit_size_query( struct lp_build_tgsi_soa_context *bld,
       return;
    }
 
-   if (has_lod)
-      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
-   else
+   if (has_lod) {
+      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
+      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
+   }
+   else {
       explicit_lod = NULL;
+      lod_property = LP_SAMPLER_LOD_SCALAR;
+   }
 
-   pipe_target = tgsi_to_pipe_tex_target(target);
 
-   /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
-   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
+   pipe_target = tgsi_to_pipe_tex_target(target);
 
    bld->sampler->emit_size_query(bld->sampler,
                                  bld->bld_base.base.gallivm,
                                  bld->bld_base.int_bld.type,
                                  unit, pipe_target,
                                  is_sviewinfo,
-                                 scalar_lod,
+                                 lod_property,
                                  explicit_lod,
                                  sizes_out);
 }
index 68a4be932231b0b525fe7c760ed75492226d75f6..f0a4a342bc2c1e15ca909aa2f9432de759c5a111 100644 (file)
@@ -244,7 +244,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
                                      const struct lp_derivatives *derivs,
                                      LLVMValueRef lod_bias, /* optional */
                                      LLVMValueRef explicit_lod, /* optional */
-                                     boolean scalar_lod,
+                                     enum lp_sampler_lod_property lod_property,
                                      LLVMValueRef *texel)
 {
    struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
@@ -268,7 +268,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
                        coords,
                        offsets,
                        derivs,
-                       lod_bias, explicit_lod, scalar_lod,
+                       lod_bias, explicit_lod, lod_property,
                        texel);
 }
 
@@ -282,7 +282,7 @@ lp_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
                                     unsigned texture_unit,
                                     unsigned target,
                                     boolean is_sviewinfo,
-                                    boolean scalar_lod,
+                                    enum lp_sampler_lod_property lod_property,
                                     LLVMValueRef explicit_lod, /* optional */
                                     LLVMValueRef *sizes_out)
 {
@@ -297,7 +297,7 @@ lp_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
                            texture_unit,
                            target,
                            is_sviewinfo,
-                           scalar_lod,
+                           lod_property,
                            explicit_lod,
                            sizes_out);
 }