From 01d913cf90d3c57ba82f555bdf4ccb8ef7f9801e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 7 Dec 2018 15:47:18 -0800 Subject: [PATCH] v3d: Add support for CS workgroup/invocation id intrinsics. We get a payload for the ivec3 workgroup and an int local invocation index, and we use the core lowering to turn into the global invocation id and the local invocation id ivec3s. --- src/broadcom/compiler/nir_to_vir.c | 54 ++++++++++++++++++- src/broadcom/compiler/v3d_compiler.h | 8 +++ src/broadcom/compiler/vir.c | 1 + src/broadcom/compiler/vir_dump.c | 4 ++ src/broadcom/compiler/vir_register_allocate.c | 1 + 5 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index f10ed5975c1..c1889a7d645 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1899,6 +1899,32 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) */ break; + case nir_intrinsic_load_num_work_groups: + for (int i = 0; i < 3; i++) { + ntq_store_dest(c, &instr->dest, i, + vir_uniform(c, QUNIFORM_NUM_WORK_GROUPS, + i)); + } + break; + + case nir_intrinsic_load_local_invocation_index: + ntq_store_dest(c, &instr->dest, 0, + vir_SHR(c, c->cs_payload[1], + vir_uniform_ui(c, 32 - c->local_invocation_index_bits))); + break; + + case nir_intrinsic_load_work_group_id: + ntq_store_dest(c, &instr->dest, 0, + vir_AND(c, c->cs_payload[0], + vir_uniform_ui(c, 0xffff))); + ntq_store_dest(c, &instr->dest, 1, + vir_SHR(c, c->cs_payload[0], + vir_uniform_ui(c, 16))); + ntq_store_dest(c, &instr->dest, 2, + vir_AND(c, c->cs_payload[1], + vir_uniform_ui(c, 0xffff))); + break; + default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); @@ -2255,7 +2281,8 @@ ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl) static void nir_to_vir(struct v3d_compile *c) { - if (c->s->info.stage == MESA_SHADER_FRAGMENT) { + switch (c->s->info.stage) { + case MESA_SHADER_FRAGMENT: c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0)); c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1)); c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2)); @@ -2270,6 +2297,30 @@ nir_to_vir(struct v3d_compile *c) } else if (c->fs_key->is_lines) { c->line_x = emit_fragment_varying(c, NULL, 0, 0); } + break; + case MESA_SHADER_COMPUTE: + if (c->s->info.system_values_read & + ((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) | + (1ull << SYSTEM_VALUE_WORK_GROUP_ID))) { + c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0)); + } + if (c->s->info.system_values_read & + ((1ull << SYSTEM_VALUE_WORK_GROUP_ID))) { + c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2)); + } + + /* Set up the division between gl_LocalInvocationIndex and + * wg_in_mem in the payload reg. + */ + int wg_size = (c->s->info.cs.local_size[0] * + c->s->info.cs.local_size[1] * + c->s->info.cs.local_size[2]); + c->local_invocation_index_bits = + ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1; + assert(c->local_invocation_index_bits <= 8); + break; + default: + break; } if (c->s->info.stage == MESA_SHADER_FRAGMENT) @@ -2298,6 +2349,7 @@ const nir_shader_compiler_options v3d_nir_options = { .lower_bitfield_extract_to_shifts = true, .lower_bitfield_reverse = true, .lower_bit_count = true, + .lower_cs_local_id_from_index = true, .lower_pack_unorm_2x16 = true, .lower_pack_snorm_2x16 = true, .lower_pack_unorm_4x8 = true, diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index a5f99d3dae8..c61e0c95156 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -259,6 +259,11 @@ enum quniform_contents { QUNIFORM_ALPHA_REF, + /* Number of workgroups passed to glDispatchCompute in the dimension + * selected by the data value. + */ + QUNIFORM_NUM_WORK_GROUPS, + /** * Returns the the offset of the scratch buffer for register spilling. */ @@ -540,6 +545,9 @@ struct v3d_compile { /* Fragment shader payload regs. */ struct qreg payload_w, payload_w_centroid, payload_z; + struct qreg cs_payload[2]; + int local_invocation_index_bits; + uint8_t vattr_sizes[V3D_MAX_VS_INPUTS]; uint32_t num_vpm_writes; diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 55a02123322..10105fbd861 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -639,6 +639,7 @@ v3d_lower_nir(struct v3d_compile *c) } NIR_PASS_V(c->s, nir_lower_tex, &tex_options); + NIR_PASS_V(c->s, nir_lower_system_values); } static void diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c index 028e2b36c4b..0ec3070dedc 100644 --- a/src/broadcom/compiler/vir_dump.c +++ b/src/broadcom/compiler/vir_dump.c @@ -108,6 +108,10 @@ vir_dump_uniform(enum quniform_contents contents, fprintf(stderr, "ssbo_size[%d]", data); break; + case QUNIFORM_NUM_WORK_GROUPS: + fprintf(stderr, "num_wg.%c", data < 3 ? "xyz"[data] : '?'); + break; + default: if (quniform_contents_is_texture_p0(contents)) { fprintf(stderr, "tex[%d].p0: 0x%08x", diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index accc07a3a33..79ab5acd764 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -482,6 +482,7 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled) case 0: case 1: case 2: + case 3: /* Payload setup instructions: Force allocate * the dst to the given register (so the MOV * will disappear). -- 2.30.2