vc4: Avoid emitting small immediates for UBO indirect load address guards.
authorEric Anholt <eric@anholt.net>
Tue, 7 Feb 2017 01:30:59 +0000 (17:30 -0800)
committerEric Anholt <eric@anholt.net>
Fri, 10 Feb 2017 22:17:04 +0000 (14:17 -0800)
The kernel will reject our shader if we emit one here, and having 4, 8, or
12 as the top end of our UBO clamp rare is enough that it's not worth
making the kernel let us.

Fixes piglit fs-const-array-of-struct and
fs-const-array-of-struct-of-array since recent GLSL linking changes made
us get this as an indirect load of a uniform, instead of a tempoary.

Cc: "13.0 17.0" <mesa-stable@lists.freedesktop.org>
src/gallium/drivers/vc4/vc4_opt_small_immediates.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c

index 89c48578021e7c01adfb586c0258bd750b9017ad..07eca71f23ecfb9987ceed8b379afecfe0d940f7 100644 (file)
@@ -52,6 +52,17 @@ qir_opt_small_immediates(struct vc4_compile *c)
                 if (uses_small_imm)
                         continue;
 
+                /* Don't propagate small immediates into the top-end bounds
+                 * checking for indirect UBO loads.  The kernel doesn't parse
+                 * small immediates and rejects the shader in this case.  UBO
+                 * loads are much more expensive than the uniform load, and
+                 * indirect UBO regions are usually much larger than a small
+                 * immediate, so it's not worth updating the kernel to allow
+                 * optimizing it.
+                 */
+                if (inst->op == QOP_MIN_NOIMM)
+                        continue;
+
                 for (int i = 0; i < qir_get_nsrc(inst); i++) {
                         struct qreg src = qir_follow_movs(c, inst->src[i]);
 
index a7cc668796285827778fb069f131562a2f805010..4865bcbd283337eb0c38a45fec7b896b25aa4f5e 100644 (file)
@@ -116,9 +116,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
 
         /* Clamp to [0, array size).  Note that MIN/MAX are signed. */
         indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
-        indirect_offset = qir_MIN(c, indirect_offset,
-                                  qir_uniform_ui(c, (range->dst_offset +
-                                                     range->size - 4)));
+        indirect_offset = qir_MIN_NOIMM(c, indirect_offset,
+                                        qir_uniform_ui(c, (range->dst_offset +
+                                                           range->size - 4)));
 
         qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
                      indirect_offset,
@@ -382,7 +382,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
 
         /* Perform the clamping required by kernel validation. */
         addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
-        addr = qir_MIN(c, addr,  qir_uniform_ui(c, size - 4));
+        addr = qir_MIN_NOIMM(c, addr, qir_uniform_ui(c, size - 4));
 
         qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
                      addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
index d4f35d8f01a78b335c69c1af126ebebedb258e69..ed76c64ba9c7c351a9e1132fa9c0c102e66a6fc3 100644 (file)
@@ -58,6 +58,7 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_ASR] = { "asr", 1, 2 },
         [QOP_SHL] = { "shl", 1, 2 },
         [QOP_MIN] = { "min", 1, 2 },
+        [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 },
         [QOP_MAX] = { "max", 1, 2 },
         [QOP_AND] = { "and", 1, 2 },
         [QOP_OR] = { "or", 1, 2 },
index e189bc32d940f46a510842e9e4f23b27863c609e..daeb52b9aee33395baea703ed46df37541ff9531 100644 (file)
@@ -123,6 +123,7 @@ enum qop {
         QOP_SHR,
         QOP_ASR,
         QOP_MIN,
+        QOP_MIN_NOIMM,
         QOP_MAX,
         QOP_AND,
         QOP_OR,
@@ -725,6 +726,7 @@ QIR_ALU2(SHL)
 QIR_ALU2(SHR)
 QIR_ALU2(ASR)
 QIR_ALU2(MIN)
+QIR_ALU2(MIN_NOIMM)
 QIR_ALU2(MAX)
 QIR_ALU2(AND)
 QIR_ALU2(OR)
index 47fc0b0928b2ad3dacdded4fc8839b6f37596df5..60ca87aa467f8f33a9b950653feee17181cf003f 100644 (file)
@@ -285,6 +285,8 @@ vc4_generate_code_block(struct vc4_compile *c,
                         [QOP_MOV] = { QPU_A_OR },
                         [QOP_FMOV] = { QPU_A_FMAX },
                         [QOP_MMOV] = { QPU_M_V8MIN },
+
+                        [QOP_MIN_NOIMM] = { QPU_A_MIN },
                 };
 
                 uint64_t unpack = 0;