gallivm: clean up passing derivatives around
authorRoland Scheidegger <sroland@vmware.com>
Sat, 9 Mar 2013 00:46:33 +0000 (01:46 +0100)
committerRoland Scheidegger <sroland@vmware.com>
Mon, 11 Mar 2013 23:24:22 +0000 (00:24 +0100)
Previously, the derivatives were calculated and passed in a packed form
to the sample code (for implicit derivatives, explicit derivatives were
packed to the same format).
There's several reasons why this wasn't such a good idea:
1) the derivatives may not even be needed (not as bad as it sounds since
llvm will just throw the calculations needed for them away but still)
2) the special packing format really shouldn't be part of the sampler
interface
3) depending what the sample code actually does the derivatives will
be processed differently, hence there is no "ideal" packing. For cube
maps with explicit derivatives (which we don't do yet) for instance the
packing looked downright useless, and for non-isotropic filtering we'd
need different calculations too.

So, instead just pass the derivatives as is (for explicit derivatives),
or let the rho calculating sample code calculate them itself. This still
does exactly the same packing stuff for implicit derivatives for now,
though explicit ones are handled in a more straightforward manner (quick
estimates show performance should be quite similar, though it is much
easier to follow and also does the rho calculation per-pixel until the
end, which we eventually need for spec compliance anyway).

No piglit changes.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_quad.c
src/gallium/auxiliary/gallivm/lp_bld_sample.c
src/gallium/auxiliary/gallivm/lp_bld_sample.h
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index 8a0efed655f2fd7ef8a75dcf7645c3d5e1cb35d1..1955add88835179cd3eb855817d53c8a2b1002ac 100644 (file)
@@ -79,14 +79,9 @@ lp_build_ddy(struct lp_build_context *bld,
 }
 
 /*
- * To be able to handle multiple quads at once in texture sampling and
- * do lod calculations per quad, it is necessary to get the per-quad
- * derivatives into the lp_build_rho function.
- * For 8-wide vectors the packed derivative values for 3 coords would
- * look like this, this scales to a arbitrary (multiple of 4) vector size:
- * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
+ * Helper for building packed ddx/ddy vector for one coord (scalar per quad
+ * values). The vector will look like this (8-wide):
  * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
- * The second vector will be unused for 1d and 2d textures.
  */
 LLVMValueRef
 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
@@ -121,6 +116,11 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
 }
 
 
+/*
+ * Helper for building packed ddx/ddy vector for one coord (scalar per quad
+ * values). The vector will look like this (8-wide):
+ * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
+ */
 LLVMValueRef
 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
                                  LLVMValueRef a, LLVMValueRef b)
index ef0631c684ae0f6c6f9fea05bb7edb6514c44e6b..fc8bae70152bc2bdd257262392222cba047988f6 100644 (file)
@@ -46,6 +46,7 @@
 #include "lp_bld_type.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_pack.h"
+#include "lp_bld_quad.h"
 
 
 /*
@@ -203,6 +204,9 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
 static LLVMValueRef
 lp_build_rho(struct lp_build_sample_context *bld,
              unsigned texture_unit,
+             LLVMValueRef s,
+             LLVMValueRef t,
+             LLVMValueRef r,
              const struct lp_derivatives *derivs)
 {
    struct gallivm_state *gallivm = bld->gallivm;
@@ -211,8 +215,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
    struct lp_build_context *float_bld = &bld->float_bld;
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
-   const LLVMValueRef *ddx_ddy = derivs->ddx_ddy;
    const unsigned dims = bld->dims;
+   LLVMValueRef ddx_ddy[2];
    LLVMBuilderRef builder = bld->gallivm->builder;
    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
@@ -229,59 +233,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    LLVMValueRef rho_xvec, rho_yvec;
 
-   abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
-   if (dims > 2) {
-      abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
-   }
-   else {
-      abs_ddx_ddy[1] = NULL;
-   }
-
-   if (dims == 1) {
-      static const unsigned char swizzle1[] = {
-         0, LP_BLD_SWIZZLE_DONTCARE,
-         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-      };
-      static const unsigned char swizzle2[] = {
-         1, LP_BLD_SWIZZLE_DONTCARE,
-         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-      };
-      rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
-      rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
-   }
-   else if (dims == 2) {
-      static const unsigned char swizzle1[] = {
-         0, 2,
-         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-      };
-      static const unsigned char swizzle2[] = {
-         1, 3,
-         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-      };
-      rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
-      rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
-   }
-   else {
-      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
-      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
-      assert(dims == 3);
-      for (i = 0; i < num_quads; i++) {
-         shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
-         shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
-         shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
-         shuffles1[4*i + 3] = i32undef;
-         shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
-         shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
-         shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1);
-         shuffles2[4*i + 3] = i32undef;
-      }
-      rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
-                                        LLVMConstVector(shuffles1, length), "");
-      rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
-                                        LLVMConstVector(shuffles2, length), "");
-   }
-
-   rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
+   /* Note that all simplified calculations will only work for isotropic filtering */
 
    first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                  bld->gallivm, texture_unit);
@@ -289,76 +241,176 @@ lp_build_rho(struct lp_build_sample_context *bld,
    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
    float_size = lp_build_int_to_float(float_size_bld, int_size);
 
-   if (bld->coord_type.length > 4) {
-      /* expand size to each quad */
+   /* XXX ignoring explicit derivs for cube maps for now */
+   if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
+      LLVMValueRef ddmax[3];
+      for (i = 0; i < dims; i++) {
+         LLVMValueRef ddx, ddy;
+         LLVMValueRef floatdim;
+         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+         ddx = lp_build_abs(coord_bld, derivs->ddx[i]);
+         ddy = lp_build_abs(coord_bld, derivs->ddy[i]);
+         ddmax[i] = lp_build_max(coord_bld, ddx, ddy);
+         floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
+                                               coord_bld->type, float_size, indexi);
+         ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
+      }
+      rho_vec = ddmax[0];
       if (dims > 1) {
-         /* could use some broadcast_vector helper for this? */
-         int num_quads = bld->coord_type.length / 4;
-         LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
-         for (i = 0; i < num_quads; i++) {
-            src[i] = float_size;
+         rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[1]);
+         if (dims > 2) {
+            rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[2]);
+         }
+      }
+      /*
+       * rho_vec now still contains per-pixel rho, convert to scalar per quad
+       * since we can't handle per-pixel rho/lod from now on (TODO).
+       */
+      rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
+                                      perquadf_bld->type, rho_vec, 0);
+   }
+   else {
+      /*
+       * This looks all a bit complex, but it's not that bad
+       * (the shuffle code makes it look worse than it is).
+       * Still, might not be ideal for all cases.
+       */
+      if (dims < 2) {
+         ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
+      }
+      else if (dims >= 2) {
+         ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld,
+                                                       s, t);
+         if (dims > 2) {
+            ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
          }
-         float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
+      }
+
+      abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
+      if (dims > 2) {
+         abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
       }
       else {
-         float_size = lp_build_broadcast_scalar(coord_bld, float_size);
+         abs_ddx_ddy[1] = NULL;
       }
-      rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
 
-      if (dims <= 1) {
-         rho = rho_vec;
+      if (dims == 1) {
+         static const unsigned char swizzle1[] = {
+            0, LP_BLD_SWIZZLE_DONTCARE,
+            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+         };
+         static const unsigned char swizzle2[] = {
+            1, LP_BLD_SWIZZLE_DONTCARE,
+            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+         };
+         rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
+         rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
+      }
+      else if (dims == 2) {
+         static const unsigned char swizzle1[] = {
+            0, 2,
+            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+         };
+         static const unsigned char swizzle2[] = {
+            1, 3,
+            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+         };
+         rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
+         rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
       }
       else {
-         if (dims >= 2) {
-            static const unsigned char swizzle1[] = {
-               0, LP_BLD_SWIZZLE_DONTCARE,
-               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-            };
-            static const unsigned char swizzle2[] = {
-               1, LP_BLD_SWIZZLE_DONTCARE,
-               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
-            };
-            LLVMValueRef rho_s, rho_t, rho_r;
-
-            rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
-            rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
-
-            rho = lp_build_max(coord_bld, rho_s, rho_t);
-
-            if (dims >= 3) {
-               static const unsigned char swizzle3[] = {
-                  2, LP_BLD_SWIZZLE_DONTCARE,
+         LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
+         LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
+         assert(dims == 3);
+         for (i = 0; i < num_quads; i++) {
+            shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
+            shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
+            shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
+            shuffles1[4*i + 3] = i32undef;
+            shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
+            shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
+            shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1);
+            shuffles2[4*i + 3] = i32undef;
+         }
+         rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+                                           LLVMConstVector(shuffles1, length), "");
+         rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+                                           LLVMConstVector(shuffles2, length), "");
+      }
+
+      rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
+
+      if (bld->coord_type.length > 4) {
+         /* expand size to each quad */
+         if (dims > 1) {
+            /* could use some broadcast_vector helper for this? */
+            int num_quads = bld->coord_type.length / 4;
+            LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
+            for (i = 0; i < num_quads; i++) {
+               src[i] = float_size;
+            }
+            float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
+         }
+         else {
+            float_size = lp_build_broadcast_scalar(coord_bld, float_size);
+         }
+         rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
+
+         if (dims <= 1) {
+            rho = rho_vec;
+         }
+         else {
+            if (dims >= 2) {
+               static const unsigned char swizzle1[] = {
+                  0, LP_BLD_SWIZZLE_DONTCARE,
                   LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
                };
-               rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3);
-               rho = lp_build_max(coord_bld, rho, rho_r);
+               static const unsigned char swizzle2[] = {
+                  1, LP_BLD_SWIZZLE_DONTCARE,
+                  LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+               };
+               LLVMValueRef rho_s, rho_t, rho_r;
+
+               rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
+               rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
+
+               rho = lp_build_max(coord_bld, rho_s, rho_t);
+
+               if (dims >= 3) {
+                  static const unsigned char swizzle3[] = {
+                     2, LP_BLD_SWIZZLE_DONTCARE,
+                     LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+                  };
+                  rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3);
+                  rho = lp_build_max(coord_bld, rho, rho_r);
+               }
             }
          }
-      }
-      rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                      perquadf_bld->type, rho, 0);
-   }
-   else {
-      if (dims <= 1) {
-         rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
-      }
-      rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
-
-      if (dims <= 1) {
-         rho = rho_vec;
+         rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
+                                         perquadf_bld->type, rho, 0);
       }
       else {
-         if (dims >= 2) {
-            LLVMValueRef rho_s, rho_t, rho_r;
+         if (dims <= 1) {
+            rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
+         }
+         rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
+
+         if (dims <= 1) {
+            rho = rho_vec;
+         }
+         else {
+            if (dims >= 2) {
+               LLVMValueRef rho_s, rho_t, rho_r;
 
-            rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
-            rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
+               rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
+               rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
 
-            rho = lp_build_max(float_bld, rho_s, rho_t);
+               rho = lp_build_max(float_bld, rho_s, rho_t);
 
-            if (dims >= 3) {
-               rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
-               rho = lp_build_max(float_bld, rho, rho_r);
+               if (dims >= 3) {
+                  rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
+                  rho = lp_build_max(float_bld, rho, rho_r);
+               }
             }
          }
       }
@@ -511,6 +563,9 @@ void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                       unsigned texture_unit,
                       unsigned sampler_unit,
+                      LLVMValueRef s,
+                      LLVMValueRef t,
+                      LLVMValueRef r,
                       const struct lp_derivatives *derivs,
                       LLVMValueRef lod_bias, /* optional */
                       LLVMValueRef explicit_lod, /* optional */
@@ -544,7 +599,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
       else {
          LLVMValueRef rho;
 
-         rho = lp_build_rho(bld, texture_unit, derivs);
+         rho = lp_build_rho(bld, texture_unit, s, t, r, derivs);
 
          /*
           * Compute lod = log2(rho)
index 63064550ee66396f198d3d24d0b49ba771b478e1..1abe0ca414ebd904299dc3ab449e1ec0717e4356 100644 (file)
@@ -56,7 +56,8 @@ struct lp_build_context;
  */
 struct lp_derivatives
 {
-   LLVMValueRef ddx_ddy[2];
+   LLVMValueRef ddx[3];
+   LLVMValueRef ddy[3];
 };
 
 
@@ -366,6 +367,9 @@ void
 lp_build_lod_selector(struct lp_build_sample_context *bld,
                       unsigned texture_index,
                       unsigned sampler_index,
+                      LLVMValueRef s,
+                      LLVMValueRef t,
+                      LLVMValueRef r,
                       const struct lp_derivatives *derivs,
                       LLVMValueRef lod_bias, /* optional */
                       LLVMValueRef explicit_lod, /* optional */
index 8aa41662d67dec6c47de46251baaf0eb195d1769..cdd910fabcfdf7808872ef8fcf9678d43a625eef 100644 (file)
@@ -1077,7 +1077,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                        LLVMValueRef *s,
                        LLVMValueRef *t,
                        LLVMValueRef *r,
-                       const struct lp_derivatives *derivs,
+                       const struct lp_derivatives *derivs, /* optional */
                        LLVMValueRef lod_bias, /* optional */
                        LLVMValueRef explicit_lod, /* optional */
                        LLVMValueRef *lod_ipart,
@@ -1090,7 +1090,6 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
    const unsigned target = bld->static_texture_state->target;
    LLVMValueRef first_level;
-   struct lp_derivatives face_derivs;
 
    /*
    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
@@ -1107,11 +1106,6 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
       *t = face_t; /* vec */
       /* use 'r' to indicate cube face */
       *r = face; /* vec */
-
-      /* recompute ddx, ddy using the new (s,t) face texcoords */
-      face_derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, *s, *t);
-      face_derivs.ddx_ddy[1] = NULL;
-      derivs = &face_derivs;
    }
    else if (target == PIPE_TEXTURE_1D_ARRAY) {
       *r = lp_build_iround(&bld->coord_bld, *t);
@@ -1131,6 +1125,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
        * distinguish between minification/magnification with one mipmap level.
        */
       lp_build_lod_selector(bld, texture_index, sampler_index,
+                            *s, *t, *r,
                             derivs, lod_bias, explicit_lod,
                             mip_filter,
                             lod_ipart, lod_fpart);
@@ -1479,7 +1474,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                     unsigned sampler_index,
                     const LLVMValueRef *coords,
                     const LLVMValueRef *offsets,
-                    const struct lp_derivatives *derivs,
+                    const struct lp_derivatives *derivs, /* optional */
                     LLVMValueRef lod_bias, /* optional */
                     LLVMValueRef explicit_lod, /* optional */
                     LLVMValueRef texel_out[4])
index 9a30cc80296f0d2454b0cbb21f353f240c727215..98bce0eb26910951f5a27ae3bac18455617558c8 100644 (file)
@@ -363,7 +363,7 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
    unsigned target;
    unsigned unit;
    LLVMValueRef coords;
-   struct lp_derivatives derivs;
+   struct lp_derivatives derivs = { {NULL}, {NULL} };
 
    if (!bld->sampler) {
       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
@@ -374,22 +374,15 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
 
    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
 
-   if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
-      lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
-      lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
+   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+      /* probably not going to work */
+      derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
+      derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
       unit = inst->Src[3].Register.Index;
-   }  else {
-#if 0
-      ddx = lp_build_ddx( &bld->bld_base.base, coords );
-      ddy = lp_build_ddy( &bld->bld_base.base, coords );
-#else
-      /* TODO */
-      derivs.ddx_ddy[0] = bld->bld_base.base.one;
-      derivs.ddx_ddy[1] = bld->bld_base.base.one;
-#endif
+   }
+   else {
       unit = inst->Src[1].Register.Index;
    }
-
    return bld->sampler->emit_fetch_texel(bld->sampler,
                                          &bld->bld_base.base,
                                          target, unit,
index 69957fe7bb982bb1695c7e90f7dfc5226412e9d9..9fe87c40b630609530c5ca6bed58d03e490389a3 100644 (file)
@@ -1164,14 +1164,13 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
           enum lp_build_tex_modifier modifier,
           LLVMValueRef *texel)
 {
-   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
-   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    unsigned unit;
    LLVMValueRef lod_bias, explicit_lod;
    LLVMValueRef oow = NULL;
    LLVMValueRef coords[4];
    LLVMValueRef offsets[3] = { NULL };
    struct lp_derivatives derivs;
+   struct lp_derivatives *deriv_ptr = NULL;
    unsigned num_coords;
    unsigned dims;
    unsigned i;
@@ -1184,9 +1183,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
       return;
    }
 
-   derivs.ddx_ddy[0] = bld->bld_base.base.undef;
-   derivs.ddx_ddy[1] = bld->bld_base.base.undef;
-
    switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
       num_coords = 1;
@@ -1259,58 +1255,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    }
 
    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
-      LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-      LLVMValueRef ddxdyonec[3];
-      unsigned length = bld->bld_base.base.type.length;
-      unsigned num_quads = length / 4;
       unsigned dim;
-      unsigned quad;
-
       for (dim = 0; dim < dims; ++dim) {
-         LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
-         LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
-         for (quad = 0; quad < num_quads; ++quad) {
-            unsigned s1 = 4*quad;
-            unsigned s2 = 4*quad + length;
-            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
-            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
-            shuffles[4*quad + 2] = i32undef;
-            shuffles[4*quad + 3] = i32undef;
-         }
-         ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
-                                               LLVMConstVector(shuffles, length), "");
-      }
-      if (dims == 1) {
-         derivs.ddx_ddy[0] = ddxdyonec[0];
-      }
-      else if (dims >= 2) {
-         for (quad = 0; quad < num_quads; ++quad) {
-            unsigned s1 = 4*quad;
-            unsigned s2 = 4*quad + length;
-            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
-            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
-            shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
-            shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
-         }
-         derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
-                                                  LLVMConstVector(shuffles, length), "");
-         if (dims == 3) {
-            derivs.ddx_ddy[1] = ddxdyonec[2];
-         }
+         derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
+         derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
       }
+      deriv_ptr = &derivs;
       unit = inst->Src[3].Register.Index;
    }  else {
-      if (dims == 1) {
-         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
-      }
-      else if (dims >= 2) {
-         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
-                                                            coords[0], coords[1]);
-         if (dims == 3) {
-            derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
-         }
-      }
       unit = inst->Src[1].Register.Index;
    }
 
@@ -1329,7 +1281,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
                                   unit, unit,
                                   coords,
                                   offsets,
-                                  &derivs,
+                                  deriv_ptr,
                                   lod_bias, explicit_lod,
                                   texel);
 }
@@ -1341,13 +1293,13 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
             boolean compare,
             LLVMValueRef *texel)
 {
-   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    unsigned texture_unit, sampler_unit;
    LLVMValueRef lod_bias, explicit_lod;
    LLVMValueRef coords[4];
    LLVMValueRef offsets[3] = { NULL };
    struct lp_derivatives derivs;
+   struct lp_derivatives *deriv_ptr = NULL;
    unsigned num_coords, dims;
    unsigned i;
 
@@ -1366,9 +1318,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
    texture_unit = inst->Src[1].Register.Index;
    sampler_unit = inst->Src[2].Register.Index;
 
-   derivs.ddx_ddy[0] = bld->bld_base.base.undef;
-   derivs.ddx_ddy[1] = bld->bld_base.base.undef;
-
    /*
     * Note inst->Texture.Texture will contain the number of offsets,
     * however the target information is NOT there and comes from the
@@ -1449,57 +1398,12 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
    }
 
    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
-      LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-      LLVMValueRef ddxdyonec[3];
-      unsigned length = bld->bld_base.base.type.length;
-      unsigned num_quads = length / 4;
       unsigned dim;
-      unsigned quad;
-
       for (dim = 0; dim < dims; ++dim) {
-         LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
-         LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
-         for (quad = 0; quad < num_quads; ++quad) {
-            unsigned s1 = 4*quad;
-            unsigned s2 = 4*quad + length;
-            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
-            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
-            shuffles[4*quad + 2] = i32undef;
-            shuffles[4*quad + 3] = i32undef;
-         }
-         ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
-                                               LLVMConstVector(shuffles, length), "");
-      }
-      if (dims == 1) {
-         derivs.ddx_ddy[0] = ddxdyonec[0];
-      }
-      else if (dims >= 2) {
-         for (quad = 0; quad < num_quads; ++quad) {
-            unsigned s1 = 4*quad;
-            unsigned s2 = 4*quad + length;
-            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
-            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
-            shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
-            shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
-         }
-         derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
-                                                  LLVMConstVector(shuffles, length), "");
-         if (dims == 3) {
-            derivs.ddx_ddy[1] = ddxdyonec[2];
-         }
-      }
-   }  else {
-      if (dims == 1) {
-         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
-      }
-      else if (dims >= 2) {
-         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
-                                                            coords[0], coords[1]);
-         if (dims == 3) {
-            derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
-         }
+         derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
+         derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
       }
+      deriv_ptr = &derivs;
    }
 
    /* some advanced gather instructions (txgo) would require 4 offsets */
@@ -1517,7 +1421,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
                                   texture_unit, sampler_unit,
                                   coords,
                                   offsets,
-                                  &derivs,
+                                  deriv_ptr,
                                   lod_bias, explicit_lod,
                                   texel);
 }
@@ -1533,7 +1437,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
    LLVMValueRef explicit_lod = NULL;
    LLVMValueRef coords[3];
    LLVMValueRef offsets[3] = { NULL };
-   struct lp_derivatives derivs;
    unsigned num_coords;
    unsigned dims;
    unsigned i;
@@ -1548,9 +1451,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
 
    unit = inst->Src[1].Register.Index;
 
-   derivs.ddx_ddy[0] = coord_undef;
-   derivs.ddx_ddy[1] = coord_undef;
-
    if (is_samplei) {
       target = bld->sv[unit].Resource;
    }
@@ -1612,7 +1512,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
                                   unit, unit,
                                   coords,
                                   offsets,
-                                  &derivs,
+                                  NULL,
                                   NULL, explicit_lod,
                                   texel);
 }