From: Jason Ekstrand Date: Fri, 19 Oct 2018 16:14:47 +0000 (-0500) Subject: nir: Switch to using 1-bit Booleans for almost everything X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=44227453ec03f5462f1cff5760909a9dba95c61a;p=mesa.git nir: Switch to using 1-bit Booleans for almost everything This is a squash of a few distinct changes: glsl,spirv: Generate 1-bit Booleans Revert "Use 32-bit opcodes in the NIR producers and optimizations" Revert "nir/builder: Generate 32-bit bool opcodes transparently" nir/builder: Generate 1-bit Booleans in nir_build_imm_bool Reviewed-by: Eric Anholt Reviewed-by: Bas Nieuwenhuizen Tested-by: Bas Nieuwenhuizen --- diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 2ab03069672..c5ba47d9e30 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -261,7 +261,7 @@ constant_copy(ir_constant *ir, void *mem_ctx) assert(cols == 1); for (unsigned r = 0; r < rows; r++) - ret->values[0].u32[r] = ir->value.b[r] ? NIR_TRUE : NIR_FALSE; + ret->values[0].b[r] = ir->value.b[r]; break; @@ -1000,7 +1000,8 @@ nir_visitor::visit(ir_call *ir) assert(write_mask); nir_ssa_def *nir_val = evaluate_rvalue(val); - assert(!val->type->is_boolean() || nir_val->bit_size == 32); + if (val->type->is_boolean()) + nir_val = nir_b2i32(&b, nir_val); instr->src[0] = nir_src_for_ssa(nir_val); instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); @@ -1110,6 +1111,10 @@ nir_visitor::visit(ir_call *ir) type->vector_elements, bit_size, NULL); nir_builder_instr_insert(&b, &instr->instr); + + /* The value in shared memory is a 32-bit value */ + if (type->is_boolean()) + ret = nir_i2b(&b, &instr->dest.ssa); break; } case nir_intrinsic_store_shared: { @@ -1129,7 +1134,9 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); nir_ssa_def *nir_val = evaluate_rvalue(val); - assert(!val->type->is_boolean() || nir_val->bit_size == 32); + /* The value in shared memory is a 32-bit value */ + if (val->type->is_boolean()) + nir_val = nir_b2i32(&b, nir_val); instr->src[0] = nir_src_for_ssa(nir_val); instr->num_components = val->type->vector_elements; @@ -1187,7 +1194,7 @@ nir_visitor::visit(ir_call *ir) case nir_intrinsic_vote_any: case nir_intrinsic_vote_all: case nir_intrinsic_vote_ieq: { - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL); instr->num_components = 1; ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 24dee98ef51..3bf9621d1f8 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -828,7 +828,7 @@ nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type) { switch (base_type) { case GLSL_TYPE_BOOL: - return nir_type_bool32; + return nir_type_bool1; break; case GLSL_TYPE_UINT: return nir_type_uint32; @@ -1568,18 +1568,18 @@ static inline bool nir_alu_instr_is_comparison(const nir_alu_instr *instr) { switch (instr->op) { - case nir_op_flt32: - case nir_op_fge32: - case nir_op_feq32: - case nir_op_fne32: - case nir_op_ilt32: - case nir_op_ult32: - case nir_op_ige32: - case nir_op_uge32: - case nir_op_ieq32: - case nir_op_ine32: - case nir_op_i2b32: - case nir_op_f2b32: + case nir_op_flt: + case nir_op_fge: + case nir_op_feq: + case nir_op_fne: + case nir_op_ilt: + case nir_op_ult: + case nir_op_ige: + case nir_op_uge: + case nir_op_ieq: + case nir_op_ine: + case nir_op_i2b1: + case nir_op_f2b1: case nir_op_inot: case nir_op_fnot: return true; diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index d8abb7fd027..826e549019a 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -212,9 +212,9 @@ nir_imm_bool(nir_builder *build, bool x) nir_const_value v; memset(&v, 0, sizeof(v)); - v.u32[0] = x ? NIR_TRUE : NIR_FALSE; + v.b[0] = x; - return nir_build_imm(build, 1, 32, v); + return nir_build_imm(build, 1, 1, v); } static inline nir_ssa_def * @@ -976,13 +976,13 @@ nir_load_param(nir_builder *build, uint32_t param_idx) static inline nir_ssa_def * nir_f2b(nir_builder *build, nir_ssa_def *f) { - return nir_f2b32(build, f); + return nir_f2b1(build, f); } static inline nir_ssa_def * nir_i2b(nir_builder *build, nir_ssa_def *i) { - return nir_i2b32(build, i); + return nir_i2b1(build, i); } static inline nir_ssa_def * diff --git a/src/compiler/nir/nir_builder_opcodes_h.py b/src/compiler/nir/nir_builder_opcodes_h.py index 5c38818d4ec..34b8c4371e1 100644 --- a/src/compiler/nir/nir_builder_opcodes_h.py +++ b/src/compiler/nir/nir_builder_opcodes_h.py @@ -27,36 +27,6 @@ template = """\ #define _NIR_BUILDER_OPCODES_ <% -opcode_remap = { - 'flt' : 'flt32', - 'fge' : 'fge32', - 'feq' : 'feq32', - 'fne' : 'fne32', - 'ilt' : 'ilt32', - 'ige' : 'ige32', - 'ieq' : 'ieq32', - 'ine' : 'ine32', - 'ult' : 'ult32', - 'uge' : 'uge32', - - 'ball_iequal2' : 'b32all_iequal2', - 'ball_iequal3' : 'b32all_iequal3', - 'ball_iequal4' : 'b32all_iequal4', - 'bany_inequal2' : 'b32any_inequal2', - 'bany_inequal3' : 'b32any_inequal3', - 'bany_inequal4' : 'b32any_inequal4', - 'ball_fequal2' : 'b32all_fequal2', - 'ball_fequal3' : 'b32all_fequal3', - 'ball_fequal4' : 'b32all_fequal4', - 'bany_fnequal2' : 'b32any_fnequal2', - 'bany_fnequal3' : 'b32any_fnequal3', - 'bany_fnequal4' : 'b32any_fnequal4', - - 'bcsel' : 'b32csel', -} - -opcode_remap32 = { op32 : op for op, op32 in opcode_remap.items() } - def src_decl_list(num_srcs): return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs)) @@ -65,15 +35,8 @@ def src_list(num_srcs): %> % for name, opcode in sorted(opcodes.items()): - % if name in opcode_remap: - <% continue %> - % elif name in opcode_remap32: - <% builder_name = opcode_remap32[name] %> - % else: - <% builder_name = name %> - % endif static inline nir_ssa_def * -nir_${builder_name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)}) +nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)}) { return nir_build_alu(build, nir_op_${name}, ${src_list(opcode.num_inputs)}); } diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index a82a5113b5e..259f02a854e 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -433,26 +433,26 @@ get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step, int32_t iter; switch (cond_op) { - case nir_op_ige32: - case nir_op_ilt32: - case nir_op_ieq32: - case nir_op_ine32: { + case nir_op_ige: + case nir_op_ilt: + case nir_op_ieq: + case nir_op_ine: { int32_t initial_val = initial->i32[0]; int32_t span = limit->i32[0] - initial_val; iter = span / step->i32[0]; break; } - case nir_op_uge32: - case nir_op_ult32: { + case nir_op_uge: + case nir_op_ult: { uint32_t initial_val = initial->u32[0]; uint32_t span = limit->u32[0] - initial_val; iter = span / step->u32[0]; break; } - case nir_op_fge32: - case nir_op_flt32: - case nir_op_feq32: - case nir_op_fne32: { + case nir_op_fge: + case nir_op_flt: + case nir_op_feq: + case nir_op_fne: { float initial_val = initial->f32[0]; float span = limit->f32[0] - initial_val; iter = span / step->f32[0]; @@ -623,10 +623,10 @@ find_trip_count(loop_info_state *state) bool limit_rhs = true; switch (alu->op) { - case nir_op_fge32: case nir_op_ige32: case nir_op_uge32: - case nir_op_flt32: case nir_op_ilt32: case nir_op_ult32: - case nir_op_feq32: case nir_op_ieq32: - case nir_op_fne32: case nir_op_ine32: + case nir_op_fge: case nir_op_ige: case nir_op_uge: + case nir_op_flt: case nir_op_ilt: case nir_op_ult: + case nir_op_feq: case nir_op_ieq: + case nir_op_fne: case nir_op_ine: /* We assume that the limit is the "right" operand */ basic_ind = get_loop_var(alu->src[0].src.ssa, state); diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c index f95a62e780c..437dd5fb745 100644 --- a/src/compiler/nir/nir_lower_two_sided_color.c +++ b/src/compiler/nir/nir_lower_two_sided_color.c @@ -158,7 +158,11 @@ nir_lower_two_sided_color_block(nir_block *block, * bcsel(load_system_value(FACE), load_input(COLn), load_input(BFCn)) */ b->cursor = nir_before_instr(&intr->instr); - nir_ssa_def *face = nir_load_front_face(b); + nir_ssa_def *face = nir_load_front_face(b); + /* gl_FrontFace is a boolean but the intrinsic constructor creates + * 32-bit value by default. + */ + face->bit_size = 1; nir_ssa_def *front = load_input(b, state->colors[idx].front); nir_ssa_def *back = load_input(b, state->colors[idx].back); nir_ssa_def *color = nir_bcsel(b, face, front, back); diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index f5951271e0d..c21ac9219f0 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -609,7 +609,7 @@ can_propagate_through_alu(nir_src *src) case nir_op_inot: case nir_op_b2i32: return true; - case nir_op_b32csel: + case nir_op_bcsel: return src == &alu->src[0].src; default: return false; diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index 6308c8cab12..ad9d0abec03 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -205,7 +205,7 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, break; nir_phi_instr *phi = nir_instr_as_phi(instr); - nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_b32csel); + nir_alu_instr *sel = nir_alu_instr_create(shader, nir_op_bcsel); nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel); /* Splat the condition to all channels */ memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle); diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c index 52c1d257e9f..c26158dab7e 100644 --- a/src/compiler/nir/nir_opt_undef.c +++ b/src/compiler/nir/nir_opt_undef.c @@ -38,7 +38,7 @@ static bool opt_undef_csel(nir_alu_instr *instr) { - if (instr->op != nir_op_b32csel && instr->op != nir_op_fcsel) + if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel) return false; assert(instr->dest.dest.is_ssa); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 493f44136f4..47932e99639 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -298,6 +298,16 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state unsigned i, j; switch (glsl_get_base_type(type)) { + case GLSL_TYPE_BOOL: + /* Only float base types can be matrices. */ + assert(cols == 1); + + for (i = 0; i < rows; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "%s", c->values[0].b[i] ? "true" : "false"); + } + break; + case GLSL_TYPE_UINT8: case GLSL_TYPE_INT8: /* Only float base types can be matrices. */ @@ -322,7 +332,6 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state case GLSL_TYPE_UINT: case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: /* Only float base types can be matrices. */ assert(cols == 1); diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 70d593b96ab..fe34cb95837 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -95,9 +95,11 @@ static inline unsigned glsl_get_bit_size(const struct glsl_type *type) { switch (glsl_get_base_type(type)) { + case GLSL_TYPE_BOOL: + return 1; + case GLSL_TYPE_INT: case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: case GLSL_TYPE_FLOAT: /* TODO handle mediump */ case GLSL_TYPE_SUBROUTINE: return 32; diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 8c9dc7d19eb..7dc6bc914d6 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1468,7 +1468,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, opcode == SpvOpSpecConstantFalse) int_val = get_specialization(b, val, int_val); - val->constant->values[0].u32[0] = int_val ? NIR_TRUE : NIR_FALSE; + val->constant->values[0].b[0] = int_val != 0; break; } diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index b04ada92199..dc6fedc9129 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -244,15 +244,15 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b, case SpvOpShiftRightArithmetic: return nir_op_ishr; case SpvOpShiftLeftLogical: return nir_op_ishl; case SpvOpLogicalOr: return nir_op_ior; - case SpvOpLogicalEqual: return nir_op_ieq32; - case SpvOpLogicalNotEqual: return nir_op_ine32; + case SpvOpLogicalEqual: return nir_op_ieq; + case SpvOpLogicalNotEqual: return nir_op_ine; case SpvOpLogicalAnd: return nir_op_iand; case SpvOpLogicalNot: return nir_op_inot; case SpvOpBitwiseOr: return nir_op_ior; case SpvOpBitwiseXor: return nir_op_ixor; case SpvOpBitwiseAnd: return nir_op_iand; - case SpvOpSelect: return nir_op_b32csel; - case SpvOpIEqual: return nir_op_ieq32; + case SpvOpSelect: return nir_op_bcsel; + case SpvOpIEqual: return nir_op_ieq; case SpvOpBitFieldInsert: return nir_op_bitfield_insert; case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; @@ -264,27 +264,27 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b, * the logical operator to use since they also need to check if operands are * ordered. */ - case SpvOpFOrdEqual: return nir_op_feq32; - case SpvOpFUnordEqual: return nir_op_feq32; - case SpvOpINotEqual: return nir_op_ine32; - case SpvOpFOrdNotEqual: return nir_op_fne32; - case SpvOpFUnordNotEqual: return nir_op_fne32; - case SpvOpULessThan: return nir_op_ult32; - case SpvOpSLessThan: return nir_op_ilt32; - case SpvOpFOrdLessThan: return nir_op_flt32; - case SpvOpFUnordLessThan: return nir_op_flt32; - case SpvOpUGreaterThan: *swap = true; return nir_op_ult32; - case SpvOpSGreaterThan: *swap = true; return nir_op_ilt32; - case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt32; - case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt32; - case SpvOpULessThanEqual: *swap = true; return nir_op_uge32; - case SpvOpSLessThanEqual: *swap = true; return nir_op_ige32; - case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge32; - case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge32; - case SpvOpUGreaterThanEqual: return nir_op_uge32; - case SpvOpSGreaterThanEqual: return nir_op_ige32; - case SpvOpFOrdGreaterThanEqual: return nir_op_fge32; - case SpvOpFUnordGreaterThanEqual: return nir_op_fge32; + case SpvOpFOrdEqual: return nir_op_feq; + case SpvOpFUnordEqual: return nir_op_feq; + case SpvOpINotEqual: return nir_op_ine; + case SpvOpFOrdNotEqual: return nir_op_fne; + case SpvOpFUnordNotEqual: return nir_op_fne; + case SpvOpULessThan: return nir_op_ult; + case SpvOpSLessThan: return nir_op_ilt; + case SpvOpFOrdLessThan: return nir_op_flt; + case SpvOpFUnordLessThan: return nir_op_flt; + case SpvOpUGreaterThan: *swap = true; return nir_op_ult; + case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; + case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt; + case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt; + case SpvOpULessThanEqual: *swap = true; return nir_op_uge; + case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; + case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge; + case SpvOpUGreaterThanEqual: return nir_op_uge; + case SpvOpSGreaterThanEqual: return nir_op_ige; + case SpvOpFOrdGreaterThanEqual: return nir_op_fge; + case SpvOpFUnordGreaterThanEqual: return nir_op_fge; /* Conversions: */ case SpvOpQuantizeToF16: return nir_op_fquantize2f16; @@ -413,9 +413,9 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, } else { nir_op op; switch (src[0]->num_components) { - case 2: op = nir_op_b32any_inequal2; break; - case 3: op = nir_op_b32any_inequal3; break; - case 4: op = nir_op_b32any_inequal4; break; + case 2: op = nir_op_bany_inequal2; break; + case 3: op = nir_op_bany_inequal3; break; + case 4: op = nir_op_bany_inequal4; break; default: vtn_fail("invalid number of components"); } val->ssa->def = nir_build_alu(&b->nb, op, src[0], @@ -430,9 +430,9 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, } else { nir_op op; switch (src[0]->num_components) { - case 2: op = nir_op_b32all_iequal2; break; - case 3: op = nir_op_b32all_iequal3; break; - case 4: op = nir_op_b32all_iequal4; break; + case 2: op = nir_op_ball_iequal2; break; + case 3: op = nir_op_ball_iequal3; break; + case 4: op = nir_op_ball_iequal4; break; default: vtn_fail("invalid number of components"); } val->ssa->def = nir_build_alu(&b->nb, op, src[0],