From b3fd098e7daa491637d66d03366b67c989937a1f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 12 Dec 2014 20:37:04 -0800 Subject: [PATCH] nir: Make bcsel a fully vector operation Previously, the condition was a scalar that applied to all components simultaneously. As of this commit, the condition is a vector and each component is switched seperately. Reviewed-by: Connor Abbott --- src/glsl/nir/nir_lower_variables.c | 1 + src/glsl/nir/nir_opcodes.h | 5 ++--- src/glsl/nir/nir_opt_peephole_select.c | 2 ++ src/glsl/nir/nir_to_ssa.c | 2 ++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 11 ++++++++--- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/glsl/nir/nir_lower_variables.c b/src/glsl/nir/nir_lower_variables.c index b8961f85ce8..64682056ddc 100644 --- a/src/glsl/nir/nir_lower_variables.c +++ b/src/glsl/nir/nir_lower_variables.c @@ -836,6 +836,7 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state) mov = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel); mov->src[0].src = nir_src_copy(intrin->predicate, state->mem_ctx); + /* Splat the condition to all channels */ memset(mov->src[0].swizzle, 0, sizeof mov->src[0].swizzle); mov->src[1].src.is_ssa = true; diff --git a/src/glsl/nir/nir_opcodes.h b/src/glsl/nir/nir_opcodes.h index c8230b32df0..310c9d83b7c 100644 --- a/src/glsl/nir/nir_opcodes.h +++ b/src/glsl/nir/nir_opcodes.h @@ -325,9 +325,8 @@ TRIOP(flrp, nir_type_float) * bools (0.0 vs 1.0) and one for integer bools (0 vs ~0). */ -OPCODE(fcsel, 3, true, 0, nir_type_float, ARR(1, 0, 0), - ARR(nir_type_float, nir_type_float, nir_type_float)) -OPCODE(bcsel, 3, true, 0, nir_type_unsigned, ARR(1, 0, 0), +TRIOP(fcsel, nir_type_float) +OPCODE(bcsel, 3, true, 0, nir_type_unsigned, ARR(0, 0, 0), ARR(nir_type_bool, nir_type_unsigned, nir_type_unsigned)) TRIOP(bfi, nir_type_unsigned) diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c index 247fe050a70..3e8c93882d2 100644 --- a/src/glsl/nir/nir_opt_peephole_select.c +++ b/src/glsl/nir/nir_opt_peephole_select.c @@ -136,6 +136,8 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state) nir_phi_instr *phi = nir_instr_as_phi(instr); nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel); sel->src[0].src = nir_src_copy(if_stmt->condition, state->mem_ctx); + /* Splat the condition to all channels */ + memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle); assert(exec_list_length(&phi->srcs) == 2); foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { diff --git a/src/glsl/nir/nir_to_ssa.c b/src/glsl/nir/nir_to_ssa.c index 7fdae493887..6b6a0779a6d 100644 --- a/src/glsl/nir/nir_to_ssa.c +++ b/src/glsl/nir/nir_to_ssa.c @@ -235,6 +235,8 @@ rewrite_def_forwards(nir_dest *dest, void *_state) csel->dest.dest.reg.reg = dest->reg.reg; csel->dest.write_mask = (1 << dest->reg.reg->num_components) - 1; csel->src[0].src = nir_src_copy(*state->predicate, state->mem_ctx); + /* Splat the condition to all channels */ + memset(csel->src[0].swizzle, 0, sizeof csel->src[0].swizzle); csel->src[2].src.is_ssa = true; csel->src[2].src.ssa = get_ssa_src(dest->reg.reg, state); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9eece0170c1..266781bff3b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -965,9 +965,14 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_bcsel: - emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); - emit_percomp(BRW_OPCODE_SEL, result, op[1], op[2], - instr->dest.write_mask, false, BRW_PREDICATE_NORMAL); + for (unsigned i = 0; i < 4; i++) { + if (!((instr->dest.write_mask >> i) & 1)) + continue; + + emit(CMP(reg_null_d, offset(op[0], i), fs_reg(0), BRW_CONDITIONAL_NZ)); + emit(SEL(offset(result, i), offset(op[1], i), offset(op[2], i))) + ->predicate = BRW_PREDICATE_NORMAL; + } break; default: -- 2.30.2