From b94b57f7c540e55d25e1cfd7392fc030022a4933 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 10 Oct 2014 08:33:23 -0700 Subject: [PATCH] i965/cs: Initialize gl_LocalInvocationID in push constant data MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Jordan Justen Reviewed-by: Kristian Høgsberg --- src/mesa/drivers/dri/i965/brw_cs.cpp | 56 ++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 757c77d459a..d273f99c5ca 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -324,6 +324,7 @@ brw_upload_cs_state(struct brw_context *brw) uint32_t offset; uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 64, &offset); + struct gl_program *prog = (struct gl_program *) brw->compute_program; struct brw_stage_state *stage_state = &brw->cs.base; struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data; struct brw_stage_prog_data *prog_data = &cs_prog_data->base; @@ -340,8 +341,15 @@ brw_upload_cs_state(struct brw_context *brw) prog_data->binding_table.size_bytes, 32, &stage_state->bind_bo_offset); + unsigned local_id_dwords = 0; + + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { + local_id_dwords = + brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size); + } + unsigned push_constant_data_size = - prog_data->nr_params * sizeof(gl_constant_value); + (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value); unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); unsigned push_constant_regs = reg_aligned_constant_size / 32; unsigned threads = get_cs_thread_count(cs_prog_data); @@ -465,6 +473,9 @@ const struct brw_tracked_state brw_cs_state = { * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 * registers worth of push constant space. * + * Note: Any updates to brw_cs_prog_local_id_payload_dwords or + * fill_local_id_payload need to coordinated. + * * FINISHME: There are a few easy optimizations to consider. * * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is @@ -486,6 +497,31 @@ brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog, } +static void +fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, + void *buffer, unsigned *x, unsigned *y, unsigned *z) +{ + uint32_t *param = (uint32_t *)buffer; + for (unsigned i = 0; i < cs_prog_data->simd_size; i++) { + param[0 * cs_prog_data->simd_size + i] = *x; + param[1 * cs_prog_data->simd_size + i] = *y; + param[2 * cs_prog_data->simd_size + i] = *z; + + (*x)++; + if (*x == cs_prog_data->local_size[0]) { + *x = 0; + (*y)++; + if (*y == cs_prog_data->local_size[1]) { + *y = 0; + (*z)++; + if (*z == cs_prog_data->local_size[2]) + *z = 0; + } + } + } +} + + /** * Creates a region containing the push constants for the CS on gen7+. * @@ -506,6 +542,12 @@ brw_upload_cs_push_constants(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; const struct brw_stage_prog_data *prog_data = (brw_stage_prog_data*) cs_prog_data; + unsigned local_id_dwords = 0; + + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { + local_id_dwords = + brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size); + } /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. @@ -513,14 +555,14 @@ brw_upload_cs_push_constants(struct brw_context *brw, /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, prog->Parameters); - if (prog_data->nr_params == 0) { + if (prog_data->nr_params == 0 && local_id_dwords == 0) { stage_state->push_const_size = 0; } else { gl_constant_value *param; unsigned i, t; const unsigned push_constant_data_size = - prog_data->nr_params * sizeof(gl_constant_value); + (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value); const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); const unsigned param_aligned_count = reg_aligned_constant_size / sizeof(*param); @@ -536,9 +578,15 @@ brw_upload_cs_push_constants(struct brw_context *brw, STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); /* _NEW_PROGRAM_CONSTANTS */ + unsigned x = 0, y = 0, z = 0; for (t = 0; t < threads; t++) { + gl_constant_value *next_param = ¶m[t * param_aligned_count]; + if (local_id_dwords > 0) { + fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z); + next_param += local_id_dwords; + } for (i = 0; i < prog_data->nr_params; i++) { - param[t * param_aligned_count + i] = *prog_data->param[i]; + next_param[i] = *prog_data->param[i]; } } -- 2.30.2