tu: ir3: Emit push constants directly
authorConnor Abbott <cwabbott0@gmail.com>
Wed, 18 Mar 2020 12:12:31 +0000 (13:12 +0100)
committerMarge Bot <eric+marge@anholt.net>
Thu, 9 Apr 2020 15:56:55 +0000 (15:56 +0000)
Carve out some space at the beginning for push constants, and push them
directly, rather than remapping them to a UBO and then relying on the
UBO pushing code. Remapping to a UBO is easy now, where there's a single
table of UBO's, but with the bindless model it'll be a lot harder. I
haven't removed all the code to move the remaining UBO's over by 1,
though, because it's going to all get rewritten with bindless anyways.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>

src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
src/freedreno/ir3/ir3_shader.h
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_pipeline.c
src/freedreno/vulkan/tu_private.h
src/freedreno/vulkan/tu_shader.c

index a171cbe7a3c6587d548447368162c8d4e61e0f15..64599669f02c5c2e5f38d76519627938c43dd448 100644 (file)
@@ -235,7 +235,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
         * first.
         */
        const uint32_t max_upload = 16 * 1024;
-       uint32_t offset = 0;
+       uint32_t offset = shader->const_state.num_reserved_user_consts * 16;
        for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
                uint32_t range_size = state->range[i].end - state->range[i].start;
 
index 9089789b34c70122643c786fa3976b89698a1f01..280dc1c784be5773e26d20882ada7a2effa87372 100644 (file)
@@ -75,7 +75,7 @@ enum ir3_driver_param {
 
 /**
  * Describes the layout of shader consts.  This includes:
- *   + Driver lowered UBO ranges
+ *   + User consts + driver lowered UBO ranges
  *   + SSBO sizes
  *   + Image sizes/dimensions
  *   + Driver params (ie. IR3_DP_*)
@@ -114,6 +114,7 @@ enum ir3_driver_param {
  */
 struct ir3_const_state {
        unsigned num_ubos;
+       unsigned num_reserved_user_consts;
        unsigned num_driver_params;   /* scalar */
 
        struct {
index 0583be32ecd2347daf9fe025b06b5a92e3dc07c0..0bf8f56f8bfe44600f3dcd9a24a1eddcbdb54af1 100644 (file)
@@ -2676,6 +2676,21 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
       &pipeline->program.link[type];
    const struct ir3_ubo_analysis_state *state = &link->ubo_state;
 
+   if (link->push_consts.count > 0) {
+      unsigned num_units = link->push_consts.count;
+      unsigned offset = link->push_consts.lo;
+      tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_units * 4);
+      tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
+            CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+            CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+            CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
+            CP_LOAD_STATE6_0_NUM_UNIT(num_units));
+      tu_cs_emit(cs, 0);
+      tu_cs_emit(cs, 0);
+      for (unsigned i = 0; i < num_units * 4; i++)
+         tu_cs_emit(cs, push_constants[i + offset * 4]);
+   }
+
    for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
       if (state->range[i].start < state->range[i].end) {
          uint32_t size = state->range[i].end - state->range[i].start;
@@ -2694,21 +2709,6 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
          debug_assert((size % 16) == 0);
          debug_assert((offset % 16) == 0);
 
-         if (i == 0) {
-            /* push constants */
-            tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (size / 4));
-            tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
-                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
-                  CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
-                  CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
-                  CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
-            tu_cs_emit(cs, 0);
-            tu_cs_emit(cs, 0);
-            for (unsigned i = 0; i < size / 4; i++)
-               tu_cs_emit(cs, push_constants[i + offset / 4]);
-            continue;
-         }
-
          /* Look through the UBO map to find our UBO index, and get the VA for
           * that UBO.
           */
index adf848d56975f464dfb00e8fadd406a70ac48b45..5d36dfcaf3fc41fab1bb8a72968bd7dc9400d72e 100644 (file)
@@ -1948,6 +1948,7 @@ tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link,
    link->ubo_state = v->shader->ubo_state;
    link->const_state = v->shader->const_state;
    link->constlen = v->constlen;
+   link->push_consts = shader->push_consts;
    link->texture_map = shader->texture_map;
    link->sampler_map = shader->sampler_map;
    link->ubo_map = shader->ubo_map;
index aecc294b79fad0c1a8b3394d330c3bb596276fc3..3c50b2ec0194b387be19ee19e8ff9b0b1fd5bbcf 100644 (file)
@@ -1129,10 +1129,17 @@ struct tu_descriptor_map
    int array_size[128];
 };
 
+struct tu_push_constant_range
+{
+   uint32_t lo;
+   uint32_t count;
+};
+
 struct tu_shader
 {
    struct ir3_shader ir3_shader;
 
+   struct tu_push_constant_range push_consts;
    struct tu_descriptor_map texture_map;
    struct tu_descriptor_map sampler_map;
    struct tu_descriptor_map ubo_map;
@@ -1181,6 +1188,7 @@ struct tu_program_descriptor_linkage
 
    uint32_t constlen;
 
+   struct tu_push_constant_range push_consts;
    struct tu_descriptor_map texture_map;
    struct tu_descriptor_map sampler_map;
    struct tu_descriptor_map ubo_map;
index a8a5303d6ca2dce975b3e2d3b591c6059fcc1345..deb6d895feb49c067e87535716b4b5e5eadeff41 100644 (file)
@@ -210,17 +210,16 @@ static void
 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
                          struct tu_shader *shader)
 {
-   /* note: ir3 wants load_ubo, not load_uniform */
-   assert(nir_intrinsic_base(instr) == 0);
-
    nir_intrinsic_instr *load =
-      nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
-
-   nir_intrinsic_set_align(load, 4, 0);
-
+      nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
    load->num_components = instr->num_components;
-   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
-   load->src[1] = instr->src[0];
+   uint32_t base = nir_intrinsic_base(instr);
+   assert(base % 4 == 0);
+   assert(base >= shader->push_consts.lo * 16);
+   base -= shader->push_consts.lo * 16;
+   nir_intrinsic_set_base(load, base / 4);
+   load->src[0] =
+      nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)));
    nir_ssa_dest_init(&load->instr, &load->dest,
                      load->num_components, instr->dest.ssa.bit_size,
                      instr->dest.ssa.name);
@@ -343,6 +342,55 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
    }
 }
 
+/* Figure out the range of push constants that we're actually going to push to
+ * the shader, and tell the backend to reserve this range when pushing UBO
+ * constants.
+ */
+
+static void
+gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
+{
+   uint32_t min = UINT32_MAX, max = 0;
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic != nir_intrinsic_load_push_constant)
+               continue;
+
+            uint32_t base = nir_intrinsic_base(intrin);
+            uint32_t range = nir_intrinsic_range(intrin);
+            min = MIN2(min, base);
+            max = MAX2(max, base + range);
+            break;
+         }
+      }
+   }
+
+   if (min >= max) {
+      tu_shader->push_consts.lo = 0;
+      tu_shader->push_consts.count = 0;
+      tu_shader->ir3_shader.const_state.num_reserved_user_consts = 0;
+      return;
+   }
+
+   /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
+    * however there's an alignment requirement of 4 on OFFSET. Expand the
+    * range and change units accordingly.
+    */
+   tu_shader->push_consts.lo = (min / 16) / 4 * 4;
+   tu_shader->push_consts.count =
+      align(max, 16) / 16 - tu_shader->push_consts.lo;
+   tu_shader->ir3_shader.const_state.num_reserved_user_consts = 
+      align(tu_shader->push_consts.count, 4);
+}
+
 static bool
 lower_impl(nir_function_impl *impl, struct tu_shader *shader,
             const struct tu_pipeline_layout *layout)
@@ -376,6 +424,8 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
 {
    bool progress = false;
 
+   gather_push_constants(shader, tu_shader);
+
    nir_foreach_function(function, shader) {
       if (function->impl)
          progress |= lower_impl(function->impl, tu_shader, layout);