llvmpipe: Per quad interpolation.
authorJosé Fonseca <jfonseca@vmware.com>
Wed, 2 Jun 2010 17:43:35 +0000 (18:43 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Wed, 2 Jun 2010 17:43:35 +0000 (18:43 +0100)
First interpolate the 4 quads upper left corners, then sub-interpolate
each quad pixel. Do the perspective divide once per quad.

Saves some muls and reciprocates. But doesn't seem to make a
noticeable improvement.

It make the code simpler and more compact, so commiting anyway.

src/gallium/drivers/llvmpipe/lp_bld_interp.c
src/gallium/drivers/llvmpipe/lp_bld_interp.h

index f0a0a0b6cfc6b7c39be208e8858d6f3e4cf1a412..e73733e51c472181a67e488f6b013ceaa5c93d41 100644 (file)
@@ -101,117 +101,141 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
 {
    struct lp_build_context *coeff_bld = &bld->coeff_bld;
    LLVMBuilderRef builder = coeff_bld->builder;
+   LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
+   LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
+   LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+   LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+   LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+   LLVMValueRef oow = NULL;
    unsigned attrib;
    unsigned chan;
 
-   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+   /* TODO: Use more vector operations */
+
+   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
       const unsigned mask = bld->mask[attrib];
       const unsigned interp = bld->interp[attrib];
-      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-         if(mask & (1 << chan)) {
+      for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if (mask & (1 << chan)) {
             LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
-            LLVMValueRef a0 = coeff_bld->undef;
-            LLVMValueRef dadx = coeff_bld->undef;
-            LLVMValueRef dady = coeff_bld->undef;
-
-            switch( interp ) {
+            LLVMValueRef a0 = zero;
+            LLVMValueRef dadx = zero;
+            LLVMValueRef dady = zero;
+            LLVMValueRef dadxy = zero;
+            LLVMValueRef dadq;
+            LLVMValueRef dadq2;
+            LLVMValueRef a;
+
+            switch (interp) {
             case LP_INTERP_PERSPECTIVE:
                /* fall-through */
 
             case LP_INTERP_LINEAR:
-               dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
-               dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
-               dadx = lp_build_broadcast_scalar(coeff_bld, dadx);
-               dady = lp_build_broadcast_scalar(coeff_bld, dady);
-               attrib_name(dadx, attrib, chan, ".dadx");
-               attrib_name(dady, attrib, chan, ".dady");
+               if (attrib == 0 && chan == 0) {
+                  dadxy = dadx = one;
+               }
+               else if (attrib == 0 && chan == 1) {
+                  dadxy = dady = one;
+               }
+               else {
+                  dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
+                  dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
+                  dadxy = LLVMBuildAdd(builder, dadx, dady, "");
+                  attrib_name(dadx, attrib, chan, ".dadx");
+                  attrib_name(dady, attrib, chan, ".dady");
+                  attrib_name(dadxy, attrib, chan, ".dadxy");
+               }
                /* fall-through */
 
             case LP_INTERP_CONSTANT:
             case LP_INTERP_FACING:
                a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
-               a0 = lp_build_broadcast_scalar(coeff_bld, a0);
                attrib_name(a0, attrib, chan, ".a0");
                break;
 
             case LP_INTERP_POSITION:
                /* Nothing to do as the position coeffs are already setup in slot 0 */
-               break;
+               continue;
 
             default:
                assert(0);
                break;
             }
 
-            bld->a0  [attrib][chan] = a0;
-            bld->dadx[attrib][chan] = dadx;
-            bld->dady[attrib][chan] = dady;
-         }
-      }
-   }
-}
+            /*
+             * dadq = {0, dadx, dady, dadx + dady}
+             */
 
+            dadq = coeff_bld->undef;
+            dadq = LLVMBuildInsertElement(builder, dadq, zero,  i0, "");
+            dadq = LLVMBuildInsertElement(builder, dadq, dadx,  i1, "");
+            dadq = LLVMBuildInsertElement(builder, dadq, dady,  i2, "");
+            dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
 
-/**
- * Emit LLVM code to compute the fragment shader input attribute values.
- * For example, for a color input, we'll compute red, green, blue and alpha
- * values for the four pixels in a quad.
- * Recall that we're operating on 4-element vectors so each arithmetic
- * operation is operating on the four pixels in a quad.
- */
-static void
-attribs_init(struct lp_build_interp_soa_context *bld)
-{
-   struct lp_build_context *coeff_bld = &bld->coeff_bld;
-   LLVMValueRef x = bld->pos[0];
-   LLVMValueRef y = bld->pos[1];
-   LLVMValueRef oow = NULL;
-   unsigned attrib;
-   unsigned chan;
+            /*
+             * dadq2 = 2 * dq
+             */
 
-   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
-      const unsigned mask = bld->mask[attrib];
-      const unsigned interp = bld->interp[attrib];
-      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-         if(mask & (1 << chan)) {
-            if (interp == LP_INTERP_POSITION) {
-               assert(attrib > 0);
-               bld->attribs[attrib][chan] = bld->attribs[0][chan];
-            }
-            else {
-               LLVMValueRef a0   = bld->a0  [attrib][chan];
-               LLVMValueRef dadx = bld->dadx[attrib][chan];
-               LLVMValueRef dady = bld->dady[attrib][chan];
-               LLVMValueRef res;
+            dadq2 = LLVMBuildAdd(builder, dadq, dadq, "");
 
-               res = a0;
+            /*
+             * a = a0 + x * dadx + y * dady
+             */
 
+            if (attrib == 0 && chan == 0) {
+               a = bld->x;
+            }
+            else if (attrib == 0 && chan == 1) {
+               a = bld->y;
+            }
+            else {
+               a = a0;
                if (interp != LP_INTERP_CONSTANT &&
                    interp != LP_INTERP_FACING) {
-                  /* res = res + x * dadx */
-                  res = lp_build_add(coeff_bld, res, lp_build_mul(coeff_bld, x, dadx));
-                  /* res = res + y * dady */
-                  res = lp_build_add(coeff_bld, res, lp_build_mul(coeff_bld, y, dady));
+                  a = LLVMBuildAdd(builder, a,
+                                   LLVMBuildMul(builder, bld->x, dadx, ""),
+                                   "");
+                  a = LLVMBuildAdd(builder, a,
+                                   LLVMBuildMul(builder, bld->y, dady, ""),
+                                   "");
                }
+            }
 
-               /* Keep the value of the attribute before perspective divide
-                * for faster updates.
-                */
-               bld->attribs_pre[attrib][chan] = res;
-
-               if (interp == LP_INTERP_PERSPECTIVE) {
-                  LLVMValueRef w = bld->pos[3];
-                  assert(attrib != 0);
-                  assert(bld->mask[0] & TGSI_WRITEMASK_W);
-                  if(!oow)
-                     oow = lp_build_rcp(coeff_bld, w);
-                  res = lp_build_mul(coeff_bld, res, oow);
-               }
+            /*
+             * a = {a, a, a, a}
+             */
+
+            a = lp_build_broadcast(builder, coeff_bld->vec_type, a);
+
+            /*
+             * Compute the attrib values on the upper-left corner of each quad.
+             */
 
-               attrib_name(res, attrib, chan, "");
+            a = LLVMBuildAdd(builder, a, dadq2, "");
 
-               bld->attribs[attrib][chan] = res;
+            /*
+             * a    *= 1 / w
+             * dadq *= 1 / w
+             */
+
+            if (interp == LP_INTERP_PERSPECTIVE) {
+               LLVMValueRef w = bld->a[0][3];
+               assert(attrib != 0);
+               assert(bld->mask[0] & TGSI_WRITEMASK_W);
+               if (!oow) {
+                  oow = lp_build_rcp(coeff_bld, w);
+                  lp_build_name(oow, "oow");
+               }
+               a = lp_build_mul(coeff_bld, a, oow);
+               dadq = lp_build_mul(coeff_bld, dadq, oow);
             }
+
+            attrib_name(a, attrib, chan, ".a");
+            attrib_name(dadq, attrib, chan, ".dadq");
+
+            bld->a   [attrib][chan] = a;
+            bld->dadq[attrib][chan] = dadq;
          }
       }
    }
@@ -226,7 +250,7 @@ static void
 attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
 {
    struct lp_build_context *coeff_bld = &bld->coeff_bld;
-   LLVMValueRef oow = NULL;
+   LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
    unsigned attrib;
    unsigned chan;
 
@@ -235,50 +259,36 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
    for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
       const unsigned mask = bld->mask[attrib];
       const unsigned interp = bld->interp[attrib];
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(mask & (1 << chan)) {
+            LLVMValueRef a = coeff_bld->undef;
+            if (interp == LP_INTERP_CONSTANT &&
+                interp == LP_INTERP_FACING) {
+               a = bld->a[attrib][chan];
+            }
+            else if (interp == LP_INTERP_POSITION) {
+               assert(attrib > 0);
+               a = bld->attribs[0][chan];
+            }
+            else {
+               a = bld->a[attrib][chan];
 
-      if (interp != LP_INTERP_CONSTANT &&
-          interp != LP_INTERP_FACING) {
-         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-            if(mask & (1 << chan)) {
-               if (interp == LP_INTERP_POSITION) {
-                  assert(attrib > 0);
-                  bld->attribs[attrib][chan] = bld->attribs[0][chan];
-               }
-               else {
-                  LLVMValueRef dadx = bld->dadx[attrib][chan];
-                  LLVMValueRef dady = bld->dady[attrib][chan];
-                  LLVMValueRef res;
-
-                  res = bld->attribs_pre[attrib][chan];
-
-                  if (quad_index == 1 || quad_index == 3) {
-                     /* top-right or bottom-right quad */
-                     /* build res = res + dadx + dadx */
-                     res = lp_build_add(coeff_bld, res, dadx);
-                     res = lp_build_add(coeff_bld, res, dadx);
-                  }
-
-                  if (quad_index == 2 || quad_index == 3) {
-                     /* bottom-left or bottom-right quad */
-                     /* build res = res + dady + dady */
-                     res = lp_build_add(coeff_bld, res, dady);
-                     res = lp_build_add(coeff_bld, res, dady);
-                  }
-
-                  if (interp == LP_INTERP_PERSPECTIVE) {
-                     LLVMValueRef w = bld->pos[3];
-                     assert(attrib != 0);
-                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
-                     if(!oow)
-                        oow = lp_build_rcp(coeff_bld, w);
-                     res = lp_build_mul(coeff_bld, res, oow);
-                  }
-
-                  attrib_name(res, attrib, chan, "");
-
-                  bld->attribs[attrib][chan] = res;
-               }
+               /*
+                * Broadcast the attribute value for this quad into all elements
+                */
+
+               a = LLVMBuildShuffleVector(coeff_bld->builder,
+                                          a, coeff_bld->undef, shuffle, "");
+
+               /*
+                * Add the derivatives
+                */
+
+               a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]);
+
+               attrib_name(a, attrib, chan, "");
             }
+            bld->attribs[attrib][chan] = a;
          }
       }
    }
@@ -296,70 +306,9 @@ pos_init(struct lp_build_interp_soa_context *bld,
          LLVMValueRef y0)
 {
    struct lp_build_context *coeff_bld = &bld->coeff_bld;
-   LLVMValueRef x_offsets[QUAD_SIZE];
-   LLVMValueRef y_offsets[QUAD_SIZE];
-   unsigned i;
-
-   /*
-    * Derive from the quad's upper left scalar coordinates the coordinates for
-    * all other quad pixels
-    */
-
-   x0 = lp_build_broadcast(coeff_bld->builder, coeff_bld->int_vec_type, x0);
-   y0 = lp_build_broadcast(coeff_bld->builder, coeff_bld->int_vec_type, y0);
-
-   for(i = 0; i < QUAD_SIZE; ++i) {
-      x_offsets[i] = LLVMConstInt(coeff_bld->int_elem_type, quad_offset_x[i], 0);
-      y_offsets[i] = LLVMConstInt(coeff_bld->int_elem_type, quad_offset_y[i], 0);
-   }
 
-   x0 = LLVMBuildAdd(coeff_bld->builder, x0, LLVMConstVector(x_offsets, QUAD_SIZE), "");
-   y0 = LLVMBuildAdd(coeff_bld->builder, y0, LLVMConstVector(y_offsets, QUAD_SIZE), "");
-
-   x0 = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->vec_type, "");
-   y0 = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->vec_type, "");
-
-   lp_build_name(x0, "pos.x");
-   lp_build_name(y0, "pos.y");
-
-   bld->attribs[0][0] = x0;
-   bld->attribs[0][1] = y0;
-}
-
-
-/**
- * Update quad position values when moving to the next quad.
- */
-static void
-pos_update(struct lp_build_interp_soa_context *bld, int quad_index)
-{
-   struct lp_build_context *coeff_bld = &bld->coeff_bld;
-   LLVMValueRef x = bld->attribs[0][0];
-   LLVMValueRef y = bld->attribs[0][1];
-   const int xstep = 2, ystep = 2;
-
-   if (quad_index == 1 || quad_index == 3) {
-      /* top-right or bottom-right quad in block */
-      /* build x += xstep */
-      x = lp_build_add(coeff_bld, x,
-                       lp_build_const_vec(coeff_bld->type, xstep));
-   }
-
-   if (quad_index == 2) {
-      /* bottom-left quad in block */
-      /* build y += ystep */
-      y = lp_build_add(coeff_bld, y,
-                       lp_build_const_vec(coeff_bld->type, ystep));
-      /* build x -= xstep */
-      x = lp_build_sub(coeff_bld, x,
-                       lp_build_const_vec(coeff_bld->type, xstep));
-   }
-
-   lp_build_name(x, "pos.x");
-   lp_build_name(y, "pos.y");
-
-   bld->attribs[0][0] = x;
-   bld->attribs[0][1] = y;
+   bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, "");
+   bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, "");
 }
 
 
@@ -401,7 +350,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
 
    /* Position */
    bld->num_attribs = 1;
-   bld->mask[0] = TGSI_WRITEMASK_ZW;
+   bld->mask[0] = TGSI_WRITEMASK_XYZW;
    bld->interp[0] = LP_INTERP_LINEAR;
 
    /* Inputs */
@@ -418,11 +367,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
       }
    }
 
-   coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
-
    pos_init(bld, x0, y0);
 
-   attribs_init(bld);
+   coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+
+   attribs_update(bld, 0);
 }
 
 
@@ -435,7 +384,5 @@ lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
 {
    assert(quad_index < 4);
 
-   pos_update(bld, quad_index);
-
    attribs_update(bld, quad_index);
 }
index 45a430701fbc0c99838046194ef114b30dc9c916..29055133011f3f54358342c46ec2d7c5b3b69788 100644 (file)
@@ -58,12 +58,11 @@ struct lp_build_interp_soa_context
    unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */
    enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS];
 
-   LLVMValueRef a0  [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-   LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-   LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef x;
+   LLVMValueRef y;
 
-   /* Attribute values before perspective divide */
-   LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef a   [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
 
    LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];