gallivm: allow large numbers of temporaries
authorZack Rusin <zackr@vmware.com>
Tue, 4 Feb 2014 02:40:24 +0000 (21:40 -0500)
committerZack Rusin <zackr@vmware.com>
Thu, 6 Feb 2014 00:40:53 +0000 (19:40 -0500)
The number of allowed temporaries increases almost with every
iteration of an api. We used to support 128, then we started
increasing and the newer api's support 4096+. So if we notice
that the number of temporaries is larger than our statically
allocated storage would allow we just treat them as indexable
temporaries and allocate them as an array from the start.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_limits.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index 521b45b360af92f7ac918613a393afb4c026a307..e03bac640dfad3cd164a40bb78d954c20c76e9fe 100644 (file)
@@ -43,7 +43,7 @@
  * the state trackers.
  */
 
-#define LP_MAX_TGSI_TEMPS 256
+#define LP_MAX_TGSI_TEMPS 4096
 
 #define LP_MAX_TGSI_ADDRS 16
 
 
 #define LP_MAX_TGSI_CONST_BUFFERS 16
 
+/*
+ * For quick access we cache temps in a statically
+ * allocated array. This defines the maximum size
+ * of that array.
+ */
+#define LP_MAX_INLINED_TEMPS 256
 
 /**
  * Maximum control flow nesting
index 1a939517306d063004d7b8b9c5b69c5d54722d6b..e0a7c5dc1ab4fe82f39cc45d26a42f82039e8ce7 100644 (file)
@@ -445,7 +445,7 @@ struct lp_build_tgsi_soa_context
    struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
    LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
-   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS];
+   LLVMValueRef temps[LP_MAX_INLINED_TEMPS][TGSI_NUM_CHANNELS];
    LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
    LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
 
@@ -537,7 +537,7 @@ struct lp_build_tgsi_aos_context
    struct lp_build_sampler_aos *sampler;
 
    LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
-   LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
+   LLVMValueRef temps[LP_MAX_INLINED_TEMPS];
    LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
    LLVMValueRef preds[LP_MAX_TGSI_PREDS];
 
index c51fde052735a500002fb394b5fe41c77dfcb7b2..fd5df0eb52f5f8317faa3200fe470476dd4a8d3e 100644 (file)
@@ -406,7 +406,7 @@ lp_emit_declaration_aos(
    for (idx = first; idx <= last; ++idx) {
       switch (decl->Declaration.File) {
       case TGSI_FILE_TEMPORARY:
-         assert(idx < LP_MAX_TGSI_TEMPS);
+         assert(idx < LP_MAX_INLINED_TEMPS);
          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
index 9db41a9a4326a25ec2dbc417036110d375b9beb5..3ba20314203abc045476668b93b162c7542b49f6 100644 (file)
@@ -2672,8 +2672,8 @@ lp_emit_declaration_soa(
       assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
       switch (decl->Declaration.File) {
       case TGSI_FILE_TEMPORARY:
-         assert(idx < LP_MAX_TGSI_TEMPS);
          if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
+            assert(idx < LP_MAX_INLINED_TEMPS);
             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
          }
@@ -3621,6 +3621,15 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    bld.bld_base.info = info;
    bld.indirect_files = info->indirect_files;
 
+   /*
+    * If the number of temporaries is rather large then we just
+    * allocate them as an array right from the start and treat
+    * like indirect temporaries.
+    */
+   if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
+      bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
+   }
+
    bld.bld_base.soa = TRUE;
    bld.bld_base.emit_debug = emit_debug;
    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;