payload.num_regs = 2;
}
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ * no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ * easily use 16-bit words rather than 32-bit dwords in the push constant
+ * data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ * conveying the data, and thereby reduce push constant usage.
+ *
+ */
void
fs_visitor::setup_cs_payload()
{
assert(devinfo->gen >= 7);
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
payload.num_regs = 1;
if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- const unsigned local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(dispatch_width);
- assert((local_id_dwords & 0x7) == 0);
- const unsigned local_id_regs = local_id_dwords / 8;
+ prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
payload.local_invocation_id_reg = payload.num_regs;
- payload.num_regs += local_id_regs;
+ payload.num_regs += prog_data->local_invocation_id_regs;
}
}
return g.get_assembly(final_assembly_size);
}
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
+ void *buffer, uint32_t threads, uint32_t stride)
+{
+ if (prog_data->local_invocation_id_regs == 0)
+ return;
+
+ /* 'stride' should be an integer number of registers, that is, a multiple
+ * of 32 bytes.
+ */
+ assert(stride % 32 == 0);
+
+ unsigned x = 0, y = 0, z = 0;
+ for (unsigned t = 0; t < threads; t++) {
+ uint32_t *param = (uint32_t *) buffer + stride * t / 4;
+
+ for (unsigned i = 0; i < prog_data->simd_size; i++) {
+ param[0 * prog_data->simd_size + i] = x;
+ param[1 * prog_data->simd_size + i] = y;
+ param[2 * prog_data->simd_size + i] = z;
+
+ x++;
+ if (x == prog_data->local_size[0]) {
+ x = 0;
+ y++;
+ if (y == prog_data->local_size[1]) {
+ y = 0;
+ z++;
+ if (z == prog_data->local_size[2])
+ z = 0;
+ }
+ }
+ }
+ }
+}
+
fs_reg *
fs_visitor::emit_cs_local_invocation_id_setup()
{
unsigned local_id_dwords = 0;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
- }
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
+ local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
unsigned push_constant_data_size =
(prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
};
-/**
- * We are building the local ID push constant data using the simplest possible
- * method. We simply push the local IDs directly as they should appear in the
- * registers for the uvec3 gl_LocalInvocationID variable.
- *
- * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
- * registers worth of push constant space.
- *
- * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
- * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
- * to coordinated.
- *
- * FINISHME: There are a few easy optimizations to consider.
- *
- * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
- * no need for using push constant space for that dimension.
- *
- * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
- * easily use 16-bit words rather than 32-bit dwords in the push constant
- * data.
- *
- * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
- * conveying the data, and thereby reduce push constant usage.
- *
- */
-unsigned
-brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width)
-{
- return 3 * dispatch_width;
-}
-
-
-static void
-fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
- void *buffer, unsigned *x, unsigned *y, unsigned *z)
-{
- uint32_t *param = (uint32_t *)buffer;
- for (unsigned i = 0; i < cs_prog_data->simd_size; i++) {
- param[0 * cs_prog_data->simd_size + i] = *x;
- param[1 * cs_prog_data->simd_size + i] = *y;
- param[2 * cs_prog_data->simd_size + i] = *z;
-
- (*x)++;
- if (*x == cs_prog_data->local_size[0]) {
- *x = 0;
- (*y)++;
- if (*y == cs_prog_data->local_size[1]) {
- *y = 0;
- (*z)++;
- if (*z == cs_prog_data->local_size[2])
- *z = 0;
- }
- }
- }
-}
-
-
/**
* Creates a region containing the push constants for the CS on gen7+.
*
(struct brw_stage_prog_data*) cs_prog_data;
unsigned local_id_dwords = 0;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
- }
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
+ local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
+ brw_cs_fill_local_id_payload(cs_prog_data, param, threads,
+ reg_aligned_constant_size);
+
/* _NEW_PROGRAM_CONSTANTS */
- unsigned x = 0, y = 0, z = 0;
for (t = 0; t < threads; t++) {
- gl_constant_value *next_param = ¶m[t * param_aligned_count];
- if (local_id_dwords > 0) {
- fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, &z);
- next_param += local_id_dwords;
- }
+ gl_constant_value *next_param =
+ ¶m[t * param_aligned_count + local_id_dwords];
for (i = 0; i < prog_data->nr_params; i++) {
next_param[i] = *prog_data->param[i];
}