v3d: Move constant offsets to UBO addresses into the main uniform stream.
authorEric Anholt <eric@anholt.net>
Tue, 19 Mar 2019 18:39:58 +0000 (11:39 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 21 Mar 2019 21:20:50 +0000 (14:20 -0700)
We'd end up with the constant offset in the uniform stream anyway, since
they're bigger than small immediates.  Avoids the extra uniforms and adds
in the shader in favor of just adding once on the CPU.

shader-db:
total instructions in shared programs: 6496865 -> 6494851 (-0.03%)
total uniforms in shared programs: 2119511 -> 2117243 (-0.11%)

src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir_dump.c
src/gallium/drivers/v3d/v3d_screen.c
src/gallium/drivers/v3d/v3d_uniforms.c

index e2407f787c79651047dcfa6a527572cb8a4e897f..846a2a704af9f16c56dfe99345936a59fdfa7b76 100644 (file)
@@ -219,6 +219,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                 }
         }
 
+        uint32_t const_offset = 0;
+        if (nir_src_is_const(instr->src[offset_src]))
+                const_offset = nir_src_as_uint(instr->src[offset_src]);
+
         /* Make sure we won't exceed the 16-entry TMU fifo if each thread is
          * storing at the same time.
          */
@@ -227,8 +231,6 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
 
         struct qreg offset;
         if (instr->intrinsic == nir_intrinsic_load_uniform) {
-                offset = vir_uniform(c, QUNIFORM_UBO_ADDR, 0);
-
                 /* Find what variable in the default uniform block this
                  * uniform load is coming from.
                  */
@@ -252,16 +254,19 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                         c->next_ubo_dst_offset += range->size;
                 }
 
-                base = base - range->src_offset + range->dst_offset;
+                const_offset += base - range->src_offset + range->dst_offset;
 
-                if (base != 0)
-                        offset = vir_ADD(c, offset, vir_uniform_ui(c, base));
+                offset = vir_uniform(c, QUNIFORM_UBO_ADDR,
+                                     v3d_unit_data_create(0, const_offset));
+                const_offset = 0;
         } else if (instr->intrinsic == nir_intrinsic_load_ubo) {
+                uint32_t index = nir_src_as_uint(instr->src[0]) + 1;
                 /* Note that QUNIFORM_UBO_ADDR takes a UBO index shifted up by
                  * 1 (0 is gallium's constant buffer 0).
                  */
                 offset = vir_uniform(c, QUNIFORM_UBO_ADDR,
-                                     nir_src_as_uint(instr->src[0]) + 1);
+                                     v3d_unit_data_create(index, const_offset));
+                const_offset = 0;
         } else if (is_shared) {
                 /* Shared variables have no buffer index, and all start from a
                  * common base that we set up at the start of dispatch
@@ -295,8 +300,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                 dest = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUAU);
 
         struct qinst *tmu;
-        if (nir_src_is_const(instr->src[offset_src]) &&
-            nir_src_as_uint(instr->src[offset_src]) == 0) {
+        if (nir_src_is_const(instr->src[offset_src]) && const_offset == 0) {
                 tmu = vir_MOV_dest(c, dest, offset);
         } else {
                 tmu = vir_ADD_dest(c, dest,
index 81307e0996fe32d2c709601efe7341fcbc0f2d27..d8d916fb14b6d4ae29ea4e0a095e1535f02009d4 100644 (file)
@@ -283,6 +283,7 @@ enum quniform_contents {
 
 static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)
 {
+        assert(value < (1 << 24));
         return unit << 24 | value;
 }
 
index be98539b0be7fb098c10b584f6f1736d84158803..e8a2879fbe258565f7cc9b404432b6101fe10bc8 100644 (file)
@@ -99,7 +99,9 @@ vir_dump_uniform(enum quniform_contents contents,
                 break;
 
         case QUNIFORM_UBO_ADDR:
-                fprintf(stderr, "ubo[%d]", data);
+                fprintf(stderr, "ubo[%d]+0x%x",
+                        v3d_unit_data_get_unit(data),
+                        v3d_unit_data_get_offset(data));
                 break;
 
         case QUNIFORM_SSBO_OFFSET:
index c810fbc98b4c19293feaa94e15978f74dcb60664..073bdf51449fe2cbcf4e07a25ba581f93d0b48a0 100644 (file)
@@ -279,6 +279,9 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
         case PIPE_SHADER_CAP_MAX_TEMPS:
                 return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
         case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+                /* Note: Limited by the offset size in
+                 * v3d_unit_data_create().
+                 */
                 return 16 * 1024 * sizeof(float);
         case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
                 return 16;
index 371568b31e6e7875884de9ae8a7d6519c8582aa8..b48f6526d61b15f970d68a1a5ea1f7226df7c8cc 100644 (file)
@@ -334,13 +334,14 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
                                 cl_aligned_reloc(&job->indirect, &uniforms,
                                                  ubo, 0);
                         } else {
-                                int ubo_index = data;
+                                int ubo_index = v3d_unit_data_get_unit(data);
                                 struct v3d_resource *rsc =
                                         v3d_resource(cb->cb[ubo_index].buffer);
 
                                 cl_aligned_reloc(&job->indirect, &uniforms,
                                                  rsc->bo,
-                                                 cb->cb[ubo_index].buffer_offset);
+                                                 cb->cb[ubo_index].buffer_offset +
+                                                 v3d_unit_data_get_offset(data));
                         }
                         break;