From 2126c68e5cba79709e228f12eb3062a9be634a0e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 19 May 2015 16:57:43 -0700 Subject: [PATCH] nir: Get rid of the array elements parameter on load/store intrinsics Previously, we used intrinsic->const_index[1] to represent "the number of array elements to load" for load/store intrinsics. However, this set to 1 by every pass that ever creates a load/store intrinsic. Also, while it might make some sense for registers, it makes no sense whatsoever in SSA. On top of that, the i965 backend was the only backend to ever support it; freedreno and vc4 just assert that it's always 1. Let's just delete it. Signed-off-by: Jason Ekstrand Reviewed-by: Connor Abbott Reviewed-by: Rob Clark --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 - .../drivers/freedreno/ir3/ir3_compiler_nir.c | 5 -- src/gallium/drivers/vc4/vc4_program.c | 6 -- src/glsl/nir/nir_intrinsics.h | 19 +++---- src/glsl/nir/nir_lower_io.c | 2 - src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 56 +++++++++---------- 6 files changed, 34 insertions(+), 56 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 50ce3dc366e..1702b41393b 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -401,7 +401,6 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, load->num_components = 4; load->const_index[0] = index; - load->const_index[1] = 1; if (dim) { if (dimind) { load->src[srcn] = @@ -1671,7 +1670,6 @@ ttn_add_output_stores(struct ttn_compile *c) nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); store->num_components = 4; store->const_index[0] = var->data.driver_location + i; - store->const_index[1] = 1; store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg; nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 05e7049ad55..2cf25ea6e0a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1158,14 +1158,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_load_uniform: - compile_assert(ctx, intr->const_index[1] == 1); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; dst[i] = create_uniform(ctx, n); } break; case nir_intrinsic_load_uniform_indirect: - compile_assert(ctx, intr->const_index[1] == 1); src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; @@ -1178,14 +1176,12 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinsic_load_ubo(ctx, intr, dst); break; case nir_intrinsic_load_input: - compile_assert(ctx, intr->const_index[1] == 1); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; dst[i] = b->inputs[n]; } break; case nir_intrinsic_load_input_indirect: - compile_assert(ctx, intr->const_index[1] == 1); src = get_src(ctx, &intr->src[0]); struct ir3_instruction *collect = create_collect(b, b->inputs, b->ninputs); @@ -1202,7 +1198,6 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinisic_store_var(ctx, intr); break; case nir_intrinsic_store_output: - compile_assert(ctx, intr->const_index[1] == 1); src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index bf156f9b42d..d84e5f25616 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1849,8 +1849,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_uniform: - assert(instr->const_index[1] == 1); - for (int i = 0; i < instr->num_components; i++) { dest[i] = qir_uniform(c, QUNIFORM_UNIFORM, instr->const_index[0] * 4 + i); @@ -1858,8 +1856,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_uniform_indirect: - assert(instr->const_index[1] == 1); - for (int i = 0; i < instr->num_components; i++) { dest[i] = indirect_uniform_load(c, ntq_get_src(c, instr->src[0], 0), @@ -1870,8 +1866,6 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_input: - assert(instr->const_index[1] == 1); - for (int i = 0; i < instr->num_components; i++) dest[i] = c->inputs[instr->const_index[0] * 4 + i]; diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 10192c5315c..b516830be95 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -138,12 +138,11 @@ SYSTEM_VALUE(sample_mask_in, 1) SYSTEM_VALUE(invocation_id, 1) /* - * The first index is the address to load from, and the second index is the - * number of array elements to load. Indirect loads have an additional - * register input, which is added to the constant address to compute the - * final address to load from. For UBO's (and SSBO's), the first source is - * the (possibly constant) UBO buffer index and the indirect (if it exists) - * is the second source. + * The first and only index is the base address to load from. Indirect + * loads have an additional register input, which is added to the constant + * address to compute the final address to load from. For UBO's (and + * SSBO's), the first source is the (possibly constant) UBO buffer index + * and the indirect (if it exists) is the second source. * * For vector backends, the address is in terms of one vec4, and so each array * element is +4 scalar components from the previous array element. For scalar @@ -152,9 +151,9 @@ SYSTEM_VALUE(invocation_id, 1) */ #define LOAD(name, extra_srcs, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \ + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \ INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, 2, flags) + true, 0, 0, 1, flags) LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) @@ -172,7 +171,7 @@ LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \ num_indices, flags) \ -STORE(output, 2, 0) -/* STORE(ssbo, 3, 0) */ +STORE(output, 1, 0) +/* STORE(ssbo, 2, 0) */ LAST_INTRINSIC(store_output_indirect) diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 03eed04e1e9..6761d5bad33 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -288,7 +288,6 @@ nir_lower_io_block(nir_block *block, void *void_state) offset += intrin->variables[0]->var->data.driver_location; load->const_index[0] = offset; - load->const_index[1] = 1; if (has_indirect) load->src[0] = indirect; @@ -331,7 +330,6 @@ nir_lower_io_block(nir_block *block, void *void_state) offset += intrin->variables[0]->var->data.driver_location; store->const_index[0] = offset; - store->const_index[1] = 1; nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 5dd8363b91e..56a2278a2dc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1345,16 +1345,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) index -= num_direct_uniforms; } - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(uniform_reg, dest.type), index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - index++; - - emit(MOV(dest, src)); - dest = offset(dest, 1); - } + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg src = offset(retype(uniform_reg, dest.type), index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + index++; + + emit(MOV(dest, src)); + dest = offset(dest, 1); } break; } @@ -1426,17 +1424,15 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* fallthrough */ case nir_intrinsic_load_input: { unsigned index = 0; - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(nir_inputs, dest.type), - instr->const_index[0] + index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - index++; - - emit(MOV(dest, src)); - dest = offset(dest, 1); - } + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg src = offset(retype(nir_inputs, dest.type), + instr->const_index[0] + index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + index++; + + emit(MOV(dest, src)); + dest = offset(dest, 1); } break; } @@ -1564,16 +1560,14 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_store_output: { fs_reg src = get_nir_src(instr->src[0]); unsigned index = 0; - for (int i = 0; i < instr->const_index[1]; i++) { - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg new_dest = offset(retype(nir_outputs, src.type), - instr->const_index[0] + index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); - index++; - emit(MOV(new_dest, src)); - src = offset(src, 1); - } + for (unsigned j = 0; j < instr->num_components; j++) { + fs_reg new_dest = offset(retype(nir_outputs, src.type), + instr->const_index[0] + index); + if (has_indirect) + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); + index++; + emit(MOV(new_dest, src)); + src = offset(src, 1); } break; } -- 2.30.2