gallivm: implement indirect addressing over temporaries
authorZack Rusin <zackr@vmware.com>
Thu, 22 Apr 2010 22:36:07 +0000 (18:36 -0400)
committerZack Rusin <zackr@vmware.com>
Thu, 22 Apr 2010 22:36:07 +0000 (18:36 -0400)
a bit more involved than indirect addressing over consts, but still
fairly reasonable. we allocate an array instead of individual alloca's,
and we do it only if the shader does indirect addressing.

src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
src/gallium/drivers/llvmpipe/lp_state_fs.c

index 9d1103176981af7d7bb9f010e8218f5606891196..5e8bef4f5ea333fda0c7caadcfff1b992ad8bef2 100644 (file)
@@ -252,7 +252,8 @@ generate_vs(struct draw_llvm *llvm,
                      NULL /*pos*/,
                      inputs,
                      outputs,
-                     NULL/*sampler*/);
+                     NULL/*sampler*/,
+                     &llvm->draw->vs.vertex_shader->info);
 }
 
 #if DEBUG_STORE
index 63b938bfa98421b4ac7e0a583aff3caad44e9cf3..2eac5da6c69e651de30f37bb7063a91a9e87937b 100644 (file)
@@ -39,6 +39,7 @@
 
 
 struct tgsi_token;
+struct tgsi_shader_info;
 struct lp_type;
 struct lp_build_context;
 struct lp_build_mask_context;
@@ -78,7 +79,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const LLVMValueRef *pos,
                   const LLVMValueRef (*inputs)[4],
                   LLVMValueRef (*outputs)[4],
-                  struct lp_build_sampler_soa *sampler);
+                  struct lp_build_sampler_soa *sampler,
+                  struct tgsi_shader_info *info);
 
 
 #endif /* LP_BLD_TGSI_H */
index 72ed8c089a8546f255881275df9a22db8950678b..df2e24c2c90cbbf806f4523c2f83133f3b9079cc 100644 (file)
@@ -46,6 +46,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_scan.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_arit.h"
@@ -127,6 +128,11 @@ struct lp_build_tgsi_soa_context
    LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
    LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS];
 
+   /* we allocate an array of temps if we have indirect
+    * addressing and then the temps above is unused */
+   LLVMValueRef temps_array;
+   boolean has_indirect_addressing;
+
    struct lp_build_mask_context *mask;
    struct lp_exec_mask exec_mask;
 };
@@ -358,6 +364,23 @@ emit_ddy(struct lp_build_tgsi_soa_context *bld,
    return lp_build_sub(&bld->base, src_top, src_bottom);
 }
 
+static LLVMValueRef
+get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
+             unsigned index,
+             unsigned swizzle,
+             boolean is_indirect,
+             LLVMValueRef addr)
+{
+   if (!bld->has_indirect_addressing) {
+      return bld->temps[index][swizzle];
+   } else {
+      LLVMValueRef lindex =
+         LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
+      if (is_indirect)
+         lindex = lp_build_add(&bld->base, lindex, addr);
+      return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
+   }
+}
 
 /**
  * Register fetch.
@@ -392,6 +415,7 @@ emit_fetch(
          addr = LLVMBuildExtractElement(bld->base.builder,
                                         addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
                                         "");
+         addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
       }
 
       switch (reg->Register.File) {
@@ -402,7 +426,6 @@ emit_fetch(
          if (reg->Register.Indirect) {
             /*lp_build_printf(bld->base.builder,
               "\taddr = %d\n", addr);*/
-            addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
             index = lp_build_add(&bld->base, index, addr);
          }
          scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
@@ -422,11 +445,16 @@ emit_fetch(
          assert(res);
          break;
 
-      case TGSI_FILE_TEMPORARY:
-         res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
+      case TGSI_FILE_TEMPORARY: {
+         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+                                              swizzle,
+                                              reg->Register.Indirect,
+                                              addr);
+         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
          if(!res)
             return bld->base.undef;
          break;
+      }
 
       default:
          assert( 0 );
@@ -504,6 +532,7 @@ emit_store(
    LLVMValueRef value)
 {
    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+   LLVMValueRef addr;
 
    switch( inst->Instruction.Saturate ) {
    case TGSI_SAT_NONE:
@@ -523,16 +552,35 @@ emit_store(
       assert(0);
    }
 
+   if (reg->Register.Indirect) {
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
+      unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
+      addr = LLVMBuildLoad(bld->base.builder,
+                           bld->addr[reg->Indirect.Index][swizzle],
+                           "");
+      /* for indexing we want integers */
+      addr = LLVMBuildFPToSI(bld->base.builder, addr,
+                             int_vec_type, "");
+      addr = LLVMBuildExtractElement(bld->base.builder,
+                                     addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
+                                     "");
+      addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+   }
+
    switch( reg->Register.File ) {
    case TGSI_FILE_OUTPUT:
       lp_exec_mask_store(&bld->exec_mask, value,
                          bld->outputs[reg->Register.Index][chan_index]);
       break;
 
-   case TGSI_FILE_TEMPORARY:
-      lp_exec_mask_store(&bld->exec_mask, value,
-                         bld->temps[reg->Register.Index][chan_index]);
+   case TGSI_FILE_TEMPORARY: {
+      LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+                                           chan_index,
+                                           reg->Register.Indirect,
+                                           addr);
+      lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
       break;
+   }
 
    case TGSI_FILE_ADDRESS:
       lp_exec_mask_store(&bld->exec_mask, value,
@@ -691,30 +739,6 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
    lp_build_mask_update(bld->mask, mask);
 }
 
-
-/**
- * Check if inst src/dest regs use indirect addressing into temporary
- * register file.
- */
-static boolean
-indirect_temp_reference(const struct tgsi_full_instruction *inst)
-{
-   uint i;
-   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      const struct tgsi_full_src_register *reg = &inst->Src[i];
-      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
-          reg->Register.Indirect)
-         return TRUE;
-   }
-   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-      const struct tgsi_full_dst_register *reg = &inst->Dst[i];
-      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
-          reg->Register.Indirect)
-         return TRUE;
-   }
-   return FALSE;
-}
-
 static int
 emit_declaration(
    struct lp_build_tgsi_soa_context *bld,
@@ -740,8 +764,16 @@ emit_declaration(
    for (idx = first; idx <= last; ++idx) {
       switch (decl->Declaration.File) {
       case TGSI_FILE_TEMPORARY:
-         for (i = 0; i < NUM_CHANNELS; i++)
-            bld->temps[idx][i] = lp_build_alloca(&bld->base);
+         if (bld->has_indirect_addressing) {
+            LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
+                                            last*4 + 4, 0);
+            bld->temps_array = LLVMBuildArrayAlloca(bld->base.builder,
+                                                    lp_build_vec_type(bld->base.type),
+                                                    val, "");
+         } else {
+            for (i = 0; i < NUM_CHANNELS; i++)
+               bld->temps[idx][i] = lp_build_alloca(&bld->base);
+         }
          break;
 
       case TGSI_FILE_OUTPUT:
@@ -787,10 +819,6 @@ emit_instruction(
    LLVMValueRef res;
    LLVMValueRef dst0[NUM_CHANNELS];
 
-   /* we can't handle indirect addressing into temp register file yet */
-   if (indirect_temp_reference(inst))
-      return FALSE;
-
    /*
     * Stores and write masks are handled in a general fashion after the long
     * instruction opcode switch statement.
@@ -1742,7 +1770,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const LLVMValueRef *pos,
                   const LLVMValueRef (*inputs)[NUM_CHANNELS],
                   LLVMValueRef (*outputs)[NUM_CHANNELS],
-                  struct lp_build_sampler_soa *sampler)
+                  struct lp_build_sampler_soa *sampler,
+                  struct tgsi_shader_info *info)
 {
    struct lp_build_tgsi_soa_context bld;
    struct tgsi_parse_context parse;
@@ -1758,6 +1787,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
    bld.outputs = outputs;
    bld.consts_ptr = consts_ptr;
    bld.sampler = sampler;
+   bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
+                                 info->opcode_count[TGSI_OPCODE_ARL] > 0;
 
    lp_exec_mask_init(&bld.exec_mask, &bld.base);
 
index 551c3757bbbe3a5061a2569dabfb6dc48c96f762..2c4303a8957026270933a9ea4f724908180da08c 100644 (file)
@@ -474,7 +474,7 @@ generate_fs(struct llvmpipe_context *lp,
 
    lp_build_tgsi_soa(builder, tokens, type, &mask,
                      consts_ptr, interp->pos, interp->inputs,
-                     outputs, sampler);
+                     outputs, sampler, &shader->info);
 
    for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {