gallivm: fix indirect addressing of constant buffer
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_tgsi_soa.c
index d6e1c478be9b3d470a9a9b5161cbdfdbbc57f859..3515d268f8406880de00ae7189c4dd060f952613 100644 (file)
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_exec.h"
 #include "tgsi/tgsi_scan.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_arit.h"
+#include "lp_bld_gather.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_flow.h"
+#include "lp_bld_quad.h"
 #include "lp_bld_tgsi.h"
 #include "lp_bld_limits.h"
 #include "lp_bld_debug.h"
 #define CHAN_Y 1
 #define CHAN_Z 2
 #define CHAN_W 3
-
-#define QUAD_TOP_LEFT     0
-#define QUAD_TOP_RIGHT    1
-#define QUAD_BOTTOM_LEFT  2
-#define QUAD_BOTTOM_RIGHT 3
+#define NUM_CHANNELS 4
 
 #define LP_MAX_INSTRUCTIONS 256
 
@@ -148,30 +145,6 @@ struct lp_build_tgsi_soa_context
    uint max_instructions;
 };
 
-static const unsigned char
-swizzle_left[4] = {
-   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
-   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
-};
-
-static const unsigned char
-swizzle_right[4] = {
-   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
-   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
-};
-
-static const unsigned char
-swizzle_top[4] = {
-   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
-   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
-};
-
-static const unsigned char
-swizzle_bottom[4] = {
-   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
-   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
-};
-
 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
 {
    mask->bld = bld;
@@ -432,25 +405,6 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
    lp_exec_mask_update(mask);
 }
 
-static LLVMValueRef
-emit_ddx(struct lp_build_tgsi_soa_context *bld,
-         LLVMValueRef src)
-{
-   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
-   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
-   return lp_build_sub(&bld->base, src_right, src_left);
-}
-
-
-static LLVMValueRef
-emit_ddy(struct lp_build_tgsi_soa_context *bld,
-         LLVMValueRef src)
-{
-   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
-   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
-   return lp_build_sub(&bld->base, src_top, src_bottom);
-}
-
 static LLVMValueRef
 get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
              unsigned index,
@@ -470,6 +424,38 @@ get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
    }
 }
 
+
+/**
+ * Gather vector.
+ * XXX the lp_build_gather() function should be capable of doing this
+ * with a little work.
+ */
+static LLVMValueRef
+build_gather(struct lp_build_tgsi_soa_context *bld,
+             LLVMValueRef base_ptr,
+             LLVMValueRef indexes)
+{
+   LLVMValueRef res = bld->base.undef;
+   unsigned i;
+
+   /*
+    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
+    */
+   for (i = 0; i < bld->base.type.length; i++) {
+      LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
+                                                   indexes, ii, "");
+      LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
+                                             &index, 1, "");
+      LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
+      res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
+   }
+
+   return res;
+}
+
+
 /**
  * Register fetch.
  */
@@ -484,7 +470,7 @@ emit_fetch(
    const unsigned swizzle =
       tgsi_util_get_full_src_register_swizzle(reg, chan_index);
    LLVMValueRef res;
-   LLVMValueRef addr = NULL;
+   LLVMValueRef addr_vec = NULL;
 
    if (swizzle > 3) {
       assert(0 && "invalid swizzle in emit_fetch()");
@@ -494,35 +480,51 @@ emit_fetch(
    if (reg->Register.Indirect) {
       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
       unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
-      addr = LLVMBuildLoad(bld->base.builder,
-                           bld->addr[reg->Indirect.Index][swizzle],
-                           "");
+
+      LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4); 
+
+      assert(bld->has_indirect_addressing);
+
+      addr_vec = LLVMBuildLoad(bld->base.builder,
+                               bld->addr[reg->Indirect.Index][swizzle],
+                               "load addr");
+
       /* for indexing we want integers */
-      addr = LLVMBuildFPToSI(bld->base.builder, addr,
-                             int_vec_type, "");
-      addr = LLVMBuildExtractElement(bld->base.builder,
-                                     addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
-                                     "");
-      addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+      addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
+                                 int_vec_type, "");
+
+      /* addr_vec = addr_vec * 4 */
+      addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
    }
 
    switch (reg->Register.File) {
    case TGSI_FILE_CONSTANT:
       {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
-                                           reg->Register.Index*4 + swizzle, 0);
-         LLVMValueRef scalar, scalar_ptr;
-
          if (reg->Register.Indirect) {
-            /*lp_build_printf(bld->base.builder,
-              "\taddr = %d\n", addr);*/
-            index = lp_build_add(&bld->base, index, addr);
+            LLVMValueRef index_vec;  /* index into the const buffer */
+
+            /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
+            index_vec = lp_build_const_int_vec(bld->int_bld.type,
+                                   reg->Register.Index * 4 + swizzle);
+
+            /* index_vec = index_vec + addr_vec */
+            index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
+
+            /* Gather values from the constant buffer */
+            res = build_gather(bld, bld->consts_ptr, index_vec);
          }
-         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
-                                   &index, 1, "");
-         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+         else {
+            LLVMValueRef index;  /* index into the const buffer */
+            LLVMValueRef scalar, scalar_ptr;
 
-         res = lp_build_broadcast_scalar(&bld->base, scalar);
+            index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
+
+            scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
+                                      &index, 1, "");
+            scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
+            res = lp_build_broadcast_scalar(&bld->base, scalar);
+         }
       }
       break;
 
@@ -538,10 +540,19 @@ emit_fetch(
 
    case TGSI_FILE_TEMPORARY:
       {
-         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
-                                              swizzle,
-                                              reg->Register.Indirect,
-                                              addr);
+         LLVMValueRef addr = NULL;
+         LLVMValueRef temp_ptr;
+
+         if (reg->Register.Indirect) {
+            LLVMValueRef zero = lp_build_const_int32(0);
+            addr = LLVMBuildExtractElement(bld->base.builder,
+                                           addr_vec, zero, "");
+         }
+
+         temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+                                 swizzle,
+                                 reg->Register.Indirect,
+                                 addr);
          res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
          if(!res)
             return bld->base.undef;
@@ -599,10 +610,10 @@ emit_fetch_deriv(
    /* TODO: use interpolation coeffs for inputs */
 
    if(ddx)
-      *ddx = emit_ddx(bld, src);
+      *ddx = lp_build_ddx(&bld->base, src);
 
    if(ddy)
-      *ddy = emit_ddy(bld, src);
+      *ddy = lp_build_ddy(&bld->base, src);
 }
 
 
@@ -842,8 +853,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
       unit = inst->Src[3].Register.Index;
    }  else {
       for (i = 0; i < num_coords; i++) {
-         ddx[i] = emit_ddx( bld, coords[i] );
-         ddy[i] = emit_ddy( bld, coords[i] );
+         ddx[i] = lp_build_ddx( &bld->base, coords[i] );
+         ddy[i] = lp_build_ddy( &bld->base, coords[i] );
       }
       unit = inst->Src[1].Register.Index;
    }
@@ -953,10 +964,10 @@ emit_declaration(
       case TGSI_FILE_TEMPORARY:
          assert(idx < LP_MAX_TGSI_TEMPS);
          if (bld->has_indirect_addressing) {
-            LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
-                                            last*4 + 4, 0);
+            LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
+                                                   last*4 + 4, 0);
             bld->temps_array = lp_build_array_alloca(bld->base.builder,
-                                                     vec_type, val, "");
+                                                     vec_type, array_size, "");
          } else {
             for (i = 0; i < NUM_CHANNELS; i++)
                bld->temps[idx][i] = lp_build_alloca(bld->base.builder,