From 2881b123d00562fee8b7d2b4f7825f89a73e0d9f Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 2 Dec 2014 12:28:13 -0800 Subject: [PATCH] i965: Use ~0 to represent true on all generations. Jason realized that we could fix the result of the CMP instruction on Gen <= 5 by doing -(result & 1). Also do the resolves in the vec4 backend before use, rather than when the bool was created. The FS does this and it saves some unnecessary resolves. On Ironlake: total instructions in shared programs: 4289762 -> 4287277 (-0.06%) instructions in affected programs: 619430 -> 616945 (-0.40%) Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_context.c | 14 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 7 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 76 ++++++----- src/mesa/drivers/dri/i965/brw_vec4.h | 1 + .../drivers/dri/i965/brw_vec4_visitor.cpp | 124 +++++++++++------- 5 files changed, 120 insertions(+), 102 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 5830b6e442d..ee9684bf945 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -516,18 +516,10 @@ brw_initialize_context_constants(struct brw_context *brw) * contains meaning [sic] data, software should make sure all higher bits * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." * - * We select the representation of a true boolean uniform to match what the - * CMP instruction returns. - * - * The Sandybridge BSpec's description of the CMP instruction matches that - * of the Ivybridge PRM. (The description in the Sandybridge PRM is seems - * to have not been updated from Ironlake). Its CMP instruction behaves like - * Ivybridge and newer. + * We select the representation of a true boolean uniform to be ~0, and fix + * the results of Gen <= 5 CMP instruction's with -(result & 1). */ - if (brw->gen >= 6) - ctx->Const.UniformBooleanTrue = ~0; - else - ctx->Const.UniformBooleanTrue = 1; + ctx->Const.UniformBooleanTrue = ~0; /* From the gen4 PRM, volume 4 page 127: * diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c6cd73b3bfd..37857e91042 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1399,15 +1399,12 @@ fs_visitor::emit_frontfacing_interpolation() * instruction only operates on UD (or D with an abs source modifier) * sources without negation. * - * Instead, use ASR (which will give ~0/true or 0/false) followed by an - * AND 1. + * Instead, use ASR (which will give ~0/true or 0/false). */ - fs_reg asr = fs_reg(this, glsl_type::bool_type); fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D)); g1_6.negate = true; - emit(ASR(asr, g1_6, fs_reg(31))); - emit(AND(*reg, asr, fs_reg(1))); + emit(ASR(*reg, g1_6, fs_reg(31))); } return reg; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 13673ca8d9b..e4296ce9787 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -535,11 +535,7 @@ fs_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - if (ctx->Const.UniformBooleanTrue != 1) { - emit(NOT(this->result, op[0])); - } else { - emit(XOR(this->result, op[0], fs_reg(1))); - } + emit(NOT(this->result, op[0])); break; case ir_unop_neg: op[0].negate = !op[0].negate; @@ -745,7 +741,7 @@ fs_visitor::visit(ir_expression *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - if (ctx->Const.UniformBooleanTrue == 1) { + if (brw->gen <= 5) { resolve_bool_comparison(ir->operands[0], &op[0]); resolve_bool_comparison(ir->operands[1], &op[1]); } @@ -819,16 +815,13 @@ fs_visitor::visit(ir_expression *ir) emit(AND(this->result, op[0], fs_reg(1))); break; case ir_unop_b2f: - if (ctx->Const.UniformBooleanTrue != 1) { - op[0].type = BRW_REGISTER_TYPE_D; - this->result.type = BRW_REGISTER_TYPE_D; - emit(AND(this->result, op[0], fs_reg(0x3f800000u))); - this->result.type = BRW_REGISTER_TYPE_F; - } else { - temp = fs_reg(this, glsl_type::int_type); - emit(AND(temp, op[0], fs_reg(1))); - emit(MOV(this->result, temp)); + if (brw->gen <= 5) { + resolve_bool_comparison(ir->operands[0], &op[0]); } + op[0].type = BRW_REGISTER_TYPE_D; + this->result.type = BRW_REGISTER_TYPE_D; + emit(AND(this->result, op[0], fs_reg(0x3f800000u))); + this->result.type = BRW_REGISTER_TYPE_F; break; case ir_unop_f2b: @@ -2394,39 +2387,36 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) break; case ir_binop_logic_xor: - if (ctx->Const.UniformBooleanTrue == 1) { - fs_reg dst = fs_reg(this, glsl_type::uint_type); - emit(XOR(dst, op[0], op[1])); - inst = emit(AND(reg_null_d, dst, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + if (brw->gen <= 5) { + fs_reg temp = fs_reg(this, ir->type); + emit(XOR(temp, op[0], op[1])); + inst = emit(AND(reg_null_d, temp, fs_reg(1))); } else { inst = emit(XOR(reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; } + inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_binop_logic_or: - if (ctx->Const.UniformBooleanTrue == 1) { - fs_reg dst = fs_reg(this, glsl_type::uint_type); - emit(OR(dst, op[0], op[1])); - inst = emit(AND(reg_null_d, dst, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + if (brw->gen <= 5) { + fs_reg temp = fs_reg(this, ir->type); + emit(OR(temp, op[0], op[1])); + inst = emit(AND(reg_null_d, temp, fs_reg(1))); } else { inst = emit(OR(reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; } + inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_binop_logic_and: - if (ctx->Const.UniformBooleanTrue == 1) { - fs_reg dst = fs_reg(this, glsl_type::uint_type); - emit(AND(dst, op[0], op[1])); - inst = emit(AND(reg_null_d, dst, fs_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + if (brw->gen <= 5) { + fs_reg temp = fs_reg(this, ir->type); + emit(AND(temp, op[0], op[1])); + inst = emit(AND(reg_null_d, temp, fs_reg(1))); } else { inst = emit(AND(reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; } + inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_unop_f2b: @@ -2455,7 +2445,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - if (ctx->Const.UniformBooleanTrue == 1) { + if (brw->gen <= 5) { resolve_bool_comparison(expr->operands[0], &op[0]); resolve_bool_comparison(expr->operands[1], &op[1]); } @@ -2545,7 +2535,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - if (ctx->Const.UniformBooleanTrue == 1) { + if (brw->gen <= 5) { resolve_bool_comparison(expr->operands[0], &op[0]); resolve_bool_comparison(expr->operands[1], &op[1]); } @@ -3412,17 +3402,25 @@ fs_visitor::resolve_ud_negate(fs_reg *reg) *reg = temp; } +/** + * Resolve the result of a Gen4-5 CMP instruction to a proper boolean. + * + * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined. + * If we need a proper boolean value, we have to fix it up to be 0 or ~0. + */ void fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) { - assert(ctx->Const.UniformBooleanTrue == 1); + assert(brw->gen <= 5); if (rvalue->type != glsl_type::bool_type) return; - fs_reg temp = fs_reg(this, glsl_type::bool_type); - emit(AND(temp, *reg, fs_reg(1))); - *reg = temp; + fs_reg and_result = fs_reg(this, glsl_type::bool_type); + fs_reg neg_result = fs_reg(this, glsl_type::bool_type); + emit(AND(and_result, *reg, fs_reg(1))); + emit(MOV(neg_result, negate(and_result))); + *reg = neg_result; } fs_visitor::fs_visitor(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index d94c32368a5..527002723c8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -535,6 +535,7 @@ public: bool try_emit_mad(ir_expression *ir); bool try_emit_b2f_of_compare(ir_expression *ir); void resolve_ud_negate(src_reg *reg); + void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg); src_reg get_timestamp(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index a2c05eda14d..73fff751dfb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -819,18 +819,36 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, break; case ir_binop_logic_xor: - inst = emit(XOR(dst_null_d(), op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + if (brw->gen <= 5) { + src_reg temp = src_reg(this, ir->type); + emit(XOR(dst_reg(temp), op[0], op[1])); + inst = emit(AND(dst_null_d(), temp, src_reg(1))); + } else { + inst = emit(XOR(dst_null_d(), op[0], op[1])); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_binop_logic_or: - inst = emit(OR(dst_null_d(), op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + if (brw->gen <= 5) { + src_reg temp = src_reg(this, ir->type); + emit(OR(dst_reg(temp), op[0], op[1])); + inst = emit(AND(dst_null_d(), temp, src_reg(1))); + } else { + inst = emit(OR(dst_null_d(), op[0], op[1])); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_binop_logic_and: - inst = emit(AND(dst_null_d(), op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + if (brw->gen <= 5) { + src_reg temp = src_reg(this, ir->type); + emit(AND(dst_reg(temp), op[0], op[1])); + inst = emit(AND(dst_null_d(), temp, src_reg(1))); + } else { + inst = emit(AND(dst_null_d(), op[0], op[1])); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_unop_f2b: @@ -852,16 +870,27 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, break; case ir_binop_all_equal: + if (brw->gen <= 5) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); *predicate = BRW_PREDICATE_ALIGN16_ALL4H; break; case ir_binop_any_nequal: + if (brw->gen <= 5) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); *predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; case ir_unop_any: + if (brw->gen <= 5) { + resolve_bool_comparison(expr->operands[0], &op[0]); + } inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); *predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; @@ -872,6 +901,10 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, case ir_binop_lequal: case ir_binop_equal: case ir_binop_nequal: + if (brw->gen <= 5) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } emit(CMP(dst_null_d(), op[0], op[1], brw_conditional_for_comparison(expr->operation))); break; @@ -902,14 +935,8 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, resolve_ud_negate(&this->result); - if (brw->gen >= 6) { - vec4_instruction *inst = emit(AND(dst_null_d(), - this->result, src_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } else { - vec4_instruction *inst = emit(MOV(dst_null_d(), this->result)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } + vec4_instruction *inst = emit(AND(dst_null_d(), this->result, src_reg(1))); + inst->conditional_mod = BRW_CONDITIONAL_NZ; } /** @@ -1322,11 +1349,7 @@ vec4_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - if (ctx->Const.UniformBooleanTrue != 1) { - emit(NOT(result_dst, op[0])); - } else { - emit(XOR(result_dst, op[0], src_reg(1))); - } + emit(NOT(result_dst, op[0])); break; case ir_unop_neg: op[0].negate = !op[0].negate; @@ -1512,11 +1535,12 @@ vec4_visitor::visit(ir_expression *ir) case ir_binop_gequal: case ir_binop_equal: case ir_binop_nequal: { + if (brw->gen <= 5) { + resolve_bool_comparison(ir->operands[0], &op[0]); + resolve_bool_comparison(ir->operands[1], &op[1]); + } emit(CMP(result_dst, op[0], op[1], brw_conditional_for_comparison(ir->operation))); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result_src, src_reg(1))); - } break; } @@ -1530,9 +1554,6 @@ vec4_visitor::visit(ir_expression *ir) inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; } else { emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z)); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result_src, src_reg(1))); - } } break; case ir_binop_any_nequal: @@ -1546,9 +1567,6 @@ vec4_visitor::visit(ir_expression *ir) inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; } else { emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ)); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result_src, src_reg(1))); - } } break; @@ -1610,28 +1628,22 @@ vec4_visitor::visit(ir_expression *ir) emit(MOV(result_dst, op[0])); break; case ir_unop_b2i: - if (ctx->Const.UniformBooleanTrue != 1) { - emit(AND(result_dst, op[0], src_reg(1))); - } else { - emit(MOV(result_dst, op[0])); - } + emit(AND(result_dst, op[0], src_reg(1))); break; case ir_unop_b2f: - if (ctx->Const.UniformBooleanTrue != 1) { - op[0].type = BRW_REGISTER_TYPE_D; - result_dst.type = BRW_REGISTER_TYPE_D; - emit(AND(result_dst, op[0], src_reg(0x3f800000u))); - result_dst.type = BRW_REGISTER_TYPE_F; - } else { - emit(MOV(result_dst, op[0])); + if (brw->gen <= 5) { + resolve_bool_comparison(ir->operands[0], &op[0]); } + op[0].type = BRW_REGISTER_TYPE_D; + result_dst.type = BRW_REGISTER_TYPE_D; + emit(AND(result_dst, op[0], src_reg(0x3f800000u))); + result_dst.type = BRW_REGISTER_TYPE_F; break; case ir_unop_f2b: - case ir_unop_i2b: emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result_src, src_reg(1))); - } + break; + case ir_unop_i2b: + emit(AND(result_dst, op[0], src_reg(1))); break; case ir_unop_trunc: @@ -1777,9 +1789,6 @@ vec4_visitor::visit(ir_expression *ir) if (ir->type->base_type == GLSL_TYPE_BOOL) { emit(CMP(result_dst, packed_consts, src_reg(0u), BRW_CONDITIONAL_NZ)); - if (ctx->Const.UniformBooleanTrue == 1) { - emit(AND(result_dst, result, src_reg(1))); - } } else { emit(MOV(result_dst, packed_consts)); } @@ -3535,6 +3544,27 @@ vec4_visitor::resolve_ud_negate(src_reg *reg) *reg = temp; } +/** + * Resolve the result of a Gen4-5 CMP instruction to a proper boolean. + * + * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined. + * If we need a proper boolean value, we have to fix it up to be 0 or ~0. + */ +void +vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg) +{ + assert(brw->gen <= 5); + + if (!rvalue->type->is_boolean()) + return; + + src_reg and_result = src_reg(this, rvalue->type); + src_reg neg_result = src_reg(this, rvalue->type); + emit(AND(dst_reg(and_result), *reg, src_reg(1))); + emit(MOV(dst_reg(neg_result), negate(and_result))); + *reg = neg_result; +} + vec4_visitor::vec4_visitor(struct brw_context *brw, struct brw_vec4_compile *c, struct gl_program *prog, -- 2.30.2