gallivm: handle huge number of immediates
authorZack Rusin <zackr@vmware.com>
Wed, 5 Feb 2014 00:28:58 +0000 (19:28 -0500)
committerZack Rusin <zackr@vmware.com>
Thu, 6 Feb 2014 00:40:53 +0000 (19:40 -0500)
We only supported up to 256 immediates, which isn't enough. We had
code which was allocating immediates as an allocated array, but it
was always used along a statically backed array for performance
reasons. This commit adds code to skip that performance optimization
and always use just the dynamically allocated immediates if the
number of them is too great.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_limits.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index e03bac640dfad3cd164a40bb78d954c20c76e9fe..87be3511d9495239b32fb5a08149439d9c83edb8 100644 (file)
 
 #define LP_MAX_TGSI_ADDRS 16
 
-#define LP_MAX_TGSI_IMMEDIATES 256
+#define LP_MAX_TGSI_IMMEDIATES 4096
 
 #define LP_MAX_TGSI_PREDS 16
 
 #define LP_MAX_TGSI_CONST_BUFFERS 16
 
 /*
- * For quick access we cache temps in a statically
- * allocated array. This defines the maximum size
- * of that array.
+ * For quick access we cache registers in statically
+ * allocated arrays. Here we define the maximum size
+ * for those arrays.
  */
 #define LP_MAX_INLINED_TEMPS 256
 
+#define LP_MAX_INLINED_IMMEDIATES 256
+
 /**
  * Maximum control flow nesting
  *
index e0a7c5dc1ab4fe82f39cc45d26a42f82039e8ce7..ffd6e874a89d1d90f924e3792201cce957639e01 100644 (file)
@@ -444,7 +444,7 @@ struct lp_build_tgsi_soa_context
 
    struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
-   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
+   LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES][TGSI_NUM_CHANNELS];
    LLVMValueRef temps[LP_MAX_INLINED_TEMPS][TGSI_NUM_CHANNELS];
    LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
    LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
@@ -482,7 +482,7 @@ struct lp_build_tgsi_soa_context
    struct lp_exec_mask exec_mask;
 
    uint num_immediates;
-
+   boolean use_immediates_array;
 };
 
 void
@@ -536,7 +536,7 @@ struct lp_build_tgsi_aos_context
 
    struct lp_build_sampler_aos *sampler;
 
-   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
+   LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES];
    LLVMValueRef temps[LP_MAX_INLINED_TEMPS];
    LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
    LLVMValueRef preds[LP_MAX_TGSI_PREDS];
index fd5df0eb52f5f8317faa3200fe470476dd4a8d3e..4dee9bb4dd41e0d3c8c3495803282c5c7d6dbeb9 100644 (file)
@@ -1042,7 +1042,7 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
             float imm[4];
             assert(size <= 4);
-            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
+            assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
             for (chan = 0; chan < 4; ++chan) {
                imm[chan] = 0.0f;
             }
index 3ba20314203abc045476668b93b162c7542b49f6..d2cb0a0975f45ad3f9d1eb74f6232fa819ec395b 100644 (file)
@@ -1295,33 +1295,42 @@ emit_fetch_immediate(
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef res = NULL;
 
-   if (reg->Register.Indirect) {
-      LLVMValueRef indirect_index;
-      LLVMValueRef index_vec;  /* index into the immediate register array */
+   if (bld->use_immediates_array || reg->Register.Indirect) {
       LLVMValueRef imms_array;
       LLVMTypeRef fptr_type;
 
-      indirect_index = get_indirect_index(bld,
-                                          reg->Register.File,
-                                          reg->Register.Index,
-                                          &reg->Indirect);
-      /*
-       * Unlike for other reg classes, adding pixel offsets is unnecessary -
-       * immediates are stored as full vectors (FIXME??? - might be better
-       * to store them the same as constants) but all elements are the same
-       * in any case.
-       */
-      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
-                                        indirect_index,
-                                        swizzle,
-                                        FALSE);
-
       /* cast imms_array pointer to float* */
       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
 
-      /* Gather values from the immediate register array */
-      res = build_gather(&bld_base->base, imms_array, index_vec, NULL);
+      if (reg->Register.Indirect) {
+         LLVMValueRef indirect_index;
+         LLVMValueRef index_vec;  /* index into the immediate register array */
+
+         indirect_index = get_indirect_index(bld,
+                                             reg->Register.File,
+                                             reg->Register.Index,
+                                             &reg->Indirect);
+         /*
+          * Unlike for other reg classes, adding pixel offsets is unnecessary -
+          * immediates are stored as full vectors (FIXME??? - might be better
+          * to store them the same as constants) but all elements are the same
+          * in any case.
+          */
+         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+                                           indirect_index,
+                                           swizzle,
+                                           FALSE);
+
+         /* Gather values from the immediate register array */
+         res = build_gather(&bld_base->base, imms_array, index_vec, NULL);
+      } else {
+         LLVMValueRef lindex = lp_build_const_int32(gallivm,
+                                        reg->Register.Index * 4 + swizzle);
+         LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
+                                                bld->imms_array, &lindex, 1, "");
+         res = LLVMBuildLoad(builder, imms_ptr, "");
+      }
    }
    else {
       res = bld->immediates[reg->Register.Index][swizzle];
@@ -2728,51 +2737,71 @@ void lp_emit_immediate_soa(
 {
    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
    struct gallivm_state * gallivm = bld_base->base.gallivm;
-
-   /* simply copy the immediate values into the next immediates[] slot */
+   LLVMValueRef imms[4];
    unsigned i;
    const uint size = imm->Immediate.NrTokens - 1;
    assert(size <= 4);
-   assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
    switch (imm->Immediate.DataType) {
    case TGSI_IMM_FLOAT32:
       for( i = 0; i < size; ++i )
-         bld->immediates[bld->num_immediates][i] =
-            lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
+         imms[i] =
+               lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
 
       break;
    case TGSI_IMM_UINT32:
       for( i = 0; i < size; ++i ) {
          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
-         bld->immediates[bld->num_immediates][i] =
-            LLVMConstBitCast(tmp, bld_base->base.vec_type);
+         imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
       }
 
       break;
    case TGSI_IMM_INT32:
       for( i = 0; i < size; ++i ) {
          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
-         bld->immediates[bld->num_immediates][i] =
-            LLVMConstBitCast(tmp, bld_base->base.vec_type);
+         imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
       }
-            
+
       break;
    }
    for( i = size; i < 4; ++i )
-      bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
+      imms[i] = bld_base->base.undef;
 
-   if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
+   if (bld->use_immediates_array) {
       unsigned index = bld->num_immediates;
       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
       LLVMBuilderRef builder = gallivm->builder;
+
+      assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
       for (i = 0; i < 4; ++i ) {
          LLVMValueRef lindex = lp_build_const_int32(
-            bld->bld_base.base.gallivm, index * 4 + i);
+                  bld->bld_base.base.gallivm, index * 4 + i);
          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
                                              bld->imms_array, &lindex, 1, "");
-         LLVMBuildStore(builder, 
-                        bld->immediates[index][i],
-                        imm_ptr);
+         LLVMBuildStore(builder, imms[i], imm_ptr);
+      }
+   } else {
+      /* simply copy the immediate values into the next immediates[] slot */
+      unsigned i;
+      const uint size = imm->Immediate.NrTokens - 1;
+      assert(size <= 4);
+      assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
+
+      for(i = 0; i < 4; ++i )
+         bld->immediates[bld->num_immediates][i] = imms[i];
+
+      if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
+         unsigned index = bld->num_immediates;
+         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+         LLVMBuilderRef builder = gallivm->builder;
+         for (i = 0; i < 4; ++i ) {
+            LLVMValueRef lindex = lp_build_const_int32(
+                     bld->bld_base.base.gallivm, index * 4 + i);
+            LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
+                                                bld->imms_array, &lindex, 1, "");
+            LLVMBuildStore(builder,
+                           bld->immediates[index][i],
+                           imm_ptr);
+         }
       }
    }
 
@@ -3629,6 +3658,17 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
    }
+   /*
+    * For performance reason immediates are always backed in a static
+    * array, but if their number is too great, we have to use just
+    * a dynamically allocated array.
+    */
+   bld.use_immediates_array =
+         (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
+   if (bld.use_immediates_array) {
+      bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
+   }
+
 
    bld.bld_base.soa = TRUE;
    bld.bld_base.emit_debug = emit_debug;