intel/compiler: Replace cs_prog_data->push.total with a helper
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Sat, 21 Mar 2020 04:02:06 +0000 (21:02 -0700)
committerCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Fri, 10 Apr 2020 02:23:12 +0000 (19:23 -0700)
The push.total field had three values but only one was directly
used (size).  Replace it with a helper function that explicitly takes
the cs_prog_data and the number of threads -- and use that in the
drivers.

This is a preparation for ARB_compute_variable_group_size where the
number of threads (hence the total size for push constants) is not
defined at compile time (not cs_prog_data->threads).

Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4504>

src/gallium/drivers/iris/iris_program.c
src/gallium/drivers/iris/iris_state.c
src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_fs.cpp
src/intel/vulkan/anv_cmd_buffer.c
src/mesa/drivers/dri/i965/gen6_constant_state.c
src/mesa/drivers/dri/i965/genX_state_upload.c

index b45f86623d6ce3e7c189a42c332ab4a746db3d69..3e8abcd428d16921314cd467a4be97e51329592b 100644 (file)
@@ -2022,7 +2022,7 @@ void
 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
                                uint32_t *dst)
 {
-   assert(cs_prog_data->push.total.size > 0);
+   assert(brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads) > 0);
    assert(cs_prog_data->push.cross_thread.size == 0);
    assert(cs_prog_data->push.per_thread.dwords == 1);
    assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
index 479ad087d77e786abcc0eb5c8e8491bf91f82335..5ea1cd8a2f13169a823eae3dcbe75e55170ae446 100644 (file)
@@ -6543,18 +6543,19 @@ iris_upload_compute_state(struct iris_context *ice,
       assert(cs_prog_data->push.cross_thread.dwords == 0 &&
              cs_prog_data->push.per_thread.dwords == 1 &&
              cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
+      const unsigned push_const_size =
+         brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
       uint32_t *curbe_data_map =
          stream_state(batch, ice->state.dynamic_uploader,
                       &ice->state.last_res.cs_thread_ids,
-                      ALIGN(cs_prog_data->push.total.size, 64), 64,
+                      ALIGN(push_const_size, 64), 64,
                       &curbe_data_offset);
       assert(curbe_data_map);
-      memset(curbe_data_map, 0x5a, ALIGN(cs_prog_data->push.total.size, 64));
+      memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
       iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map);
 
       iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
-         curbe.CURBETotalDataLength =
-            ALIGN(cs_prog_data->push.total.size, 64);
+         curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
          curbe.CURBEDataStartAddress = curbe_data_offset;
       }
    }
index 1364890beb41df15cfc87d4e7a764b3bcd35d64c..08999e950716e9d172b90077c75268ee60c3d803 100644 (file)
@@ -910,7 +910,6 @@ struct brw_cs_prog_data {
    struct {
       struct brw_push_const_block cross_thread;
       struct brw_push_const_block per_thread;
-      struct brw_push_const_block total;
    } push;
 
    struct {
@@ -1470,6 +1469,10 @@ encode_slm_size(unsigned gen, uint32_t bytes)
    return slm_size;
 }
 
+unsigned
+brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
+                             unsigned threads);
+
 /**
  * Return true if the given shader stage is dispatched contiguously by the
  * relevant fixed function starting from channel 0 of the SIMD thread, which
index 901a13e5bf55d3606e623c7e8ce68a8303309785..96fdb6b0992f1cfb34a230f471b0d937fc0b6c48 100644 (file)
@@ -8807,6 +8807,16 @@ fs_visitor::emit_cs_work_group_id_setup()
    return reg;
 }
 
+unsigned
+brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
+                             unsigned threads)
+{
+   assert(cs_prog_data->push.per_thread.size % REG_SIZE == 0);
+   assert(cs_prog_data->push.cross_thread.size % REG_SIZE == 0);
+   return cs_prog_data->push.per_thread.size * threads +
+          cs_prog_data->push.cross_thread.size;
+}
+
 static void
 fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
 {
@@ -8845,11 +8855,6 @@ cs_fill_push_const_info(const struct gen_device_info *devinfo,
    fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
    fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
 
-   unsigned total_dwords =
-      (cs_prog_data->push.per_thread.size * cs_prog_data->threads +
-       cs_prog_data->push.cross_thread.size) / 4;
-   fill_push_const_block_info(&cs_prog_data->push.total, total_dwords);
-
    assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
           cs_prog_data->push.per_thread.size == 0);
    assert(cs_prog_data->push.cross_thread.dwords +
index 0b8e04dd63379b454fc64a176e9fb188a0f61d0d..188aff6be741a8db291bef75678c4fcff56e949d 100644 (file)
@@ -834,13 +834,15 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
 
-   if (cs_prog_data->push.total.size == 0)
+   const unsigned total_push_constants_size =
+      brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
+   if (total_push_constants_size == 0)
       return (struct anv_state) { .offset = 0 };
 
    const unsigned push_constant_alignment =
       cmd_buffer->device->info.gen < 8 ? 32 : 64;
    const unsigned aligned_total_push_constants_size =
-      ALIGN(cs_prog_data->push.total.size, push_constant_alignment);
+      ALIGN(total_push_constants_size, push_constant_alignment);
    struct anv_state state =
       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
                                          aligned_total_push_constants_size,
index 919aee49ade0a957a84c366372f0c7f282fd1d18..50e34fc9c8f8664654cfccf4922daa85701b1909 100644 (file)
@@ -303,14 +303,16 @@ brw_upload_cs_push_constants(struct brw_context *brw,
    /* XXX: Should this happen somewhere before to get our state flag set? */
    _mesa_load_state_parameters(ctx, prog->Parameters);
 
-   if (cs_prog_data->push.total.size == 0) {
+   const unsigned push_const_size =
+      brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
+   if (push_const_size == 0) {
       stage_state->push_const_size = 0;
       return;
    }
 
 
    uint32_t *param =
-      brw_state_batch(brw, ALIGN(cs_prog_data->push.total.size, 64),
+      brw_state_batch(brw, ALIGN(push_const_size, 64),
                       64, &stage_state->push_const_offset);
    assert(param);
 
index 898d5aa7a43343711fbe2624168936ed9edc1368..fed5eda8e483f4edc80df8667c55319b9c13bcf9 100644 (file)
@@ -4358,10 +4358,11 @@ genX(upload_cs_state)(struct brw_context *brw)
       vfe.CURBEAllocationSize = vfe_curbe_allocation;
    }
 
-   if (cs_prog_data->push.total.size > 0) {
+   const unsigned push_const_size =
+      brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
+   if (push_const_size > 0) {
       brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) {
-         curbe.CURBETotalDataLength =
-            ALIGN(cs_prog_data->push.total.size, 64);
+         curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
          curbe.CURBEDataStartAddress = stage_state->push_const_offset;
       }
    }