From: Caio Marcelo de Oliveira Filho Date: Sat, 21 Mar 2020 04:02:06 +0000 (-0700) Subject: intel/compiler: Replace cs_prog_data->push.total with a helper X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c54fc0d07b1a92e065000c1301971b93439595e2;p=mesa.git intel/compiler: Replace cs_prog_data->push.total with a helper The push.total field had three values but only one was directly used (size). Replace it with a helper function that explicitly takes the cs_prog_data and the number of threads -- and use that in the drivers. This is a preparation for ARB_compute_variable_group_size where the number of threads (hence the total size for push constants) is not defined at compile time (not cs_prog_data->threads). Reviewed-by: Paulo Zanoni Reviewed-by: Jordan Justen Part-of: --- diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index b45f86623d6..3e8abcd428d 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -2022,7 +2022,7 @@ void iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, uint32_t *dst) { - assert(cs_prog_data->push.total.size > 0); + assert(brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads) > 0); assert(cs_prog_data->push.cross_thread.size == 0); assert(cs_prog_data->push.per_thread.dwords == 1); assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 479ad087d77..5ea1cd8a2f1 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -6543,18 +6543,19 @@ iris_upload_compute_state(struct iris_context *ice, assert(cs_prog_data->push.cross_thread.dwords == 0 && cs_prog_data->push.per_thread.dwords == 1 && cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); + const unsigned push_const_size = + brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads); uint32_t *curbe_data_map = stream_state(batch, ice->state.dynamic_uploader, &ice->state.last_res.cs_thread_ids, - ALIGN(cs_prog_data->push.total.size, 64), 64, + ALIGN(push_const_size, 64), 64, &curbe_data_offset); assert(curbe_data_map); - memset(curbe_data_map, 0x5a, ALIGN(cs_prog_data->push.total.size, 64)); + memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64)); iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map); iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) { - curbe.CURBETotalDataLength = - ALIGN(cs_prog_data->push.total.size, 64); + curbe.CURBETotalDataLength = ALIGN(push_const_size, 64); curbe.CURBEDataStartAddress = curbe_data_offset; } } diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 1364890beb4..08999e95071 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -910,7 +910,6 @@ struct brw_cs_prog_data { struct { struct brw_push_const_block cross_thread; struct brw_push_const_block per_thread; - struct brw_push_const_block total; } push; struct { @@ -1470,6 +1469,10 @@ encode_slm_size(unsigned gen, uint32_t bytes) return slm_size; } +unsigned +brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data, + unsigned threads); + /** * Return true if the given shader stage is dispatched contiguously by the * relevant fixed function starting from channel 0 of the SIMD thread, which diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 901a13e5bf5..96fdb6b0992 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -8807,6 +8807,16 @@ fs_visitor::emit_cs_work_group_id_setup() return reg; } +unsigned +brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data, + unsigned threads) +{ + assert(cs_prog_data->push.per_thread.size % REG_SIZE == 0); + assert(cs_prog_data->push.cross_thread.size % REG_SIZE == 0); + return cs_prog_data->push.per_thread.size * threads + + cs_prog_data->push.cross_thread.size; +} + static void fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords) { @@ -8845,11 +8855,6 @@ cs_fill_push_const_info(const struct gen_device_info *devinfo, fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords); fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords); - unsigned total_dwords = - (cs_prog_data->push.per_thread.size * cs_prog_data->threads + - cs_prog_data->push.cross_thread.size) / 4; - fill_push_const_block_info(&cs_prog_data->push.total, total_dwords); - assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 || cs_prog_data->push.per_thread.size == 0); assert(cs_prog_data->push.cross_thread.dwords + diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 0b8e04dd633..188aff6be74 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -834,13 +834,15 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0]; - if (cs_prog_data->push.total.size == 0) + const unsigned total_push_constants_size = + brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads); + if (total_push_constants_size == 0) return (struct anv_state) { .offset = 0 }; const unsigned push_constant_alignment = cmd_buffer->device->info.gen < 8 ? 32 : 64; const unsigned aligned_total_push_constants_size = - ALIGN(cs_prog_data->push.total.size, push_constant_alignment); + ALIGN(total_push_constants_size, push_constant_alignment); struct anv_state state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, aligned_total_push_constants_size, diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c index 919aee49ade..50e34fc9c8f 100644 --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c @@ -303,14 +303,16 @@ brw_upload_cs_push_constants(struct brw_context *brw, /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, prog->Parameters); - if (cs_prog_data->push.total.size == 0) { + const unsigned push_const_size = + brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads); + if (push_const_size == 0) { stage_state->push_const_size = 0; return; } uint32_t *param = - brw_state_batch(brw, ALIGN(cs_prog_data->push.total.size, 64), + brw_state_batch(brw, ALIGN(push_const_size, 64), 64, &stage_state->push_const_offset); assert(param); diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 898d5aa7a43..fed5eda8e48 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -4358,10 +4358,11 @@ genX(upload_cs_state)(struct brw_context *brw) vfe.CURBEAllocationSize = vfe_curbe_allocation; } - if (cs_prog_data->push.total.size > 0) { + const unsigned push_const_size = + brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads); + if (push_const_size > 0) { brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) { - curbe.CURBETotalDataLength = - ALIGN(cs_prog_data->push.total.size, 64); + curbe.CURBETotalDataLength = ALIGN(push_const_size, 64); curbe.CURBEDataStartAddress = stage_state->push_const_offset; } }