From 2e51dc838be177a09f60958da7d1d904f1038d9c Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 8 Aug 2014 21:00:31 -0700 Subject: [PATCH] i965: Use ~0 to represent true on Gen >= 6. total instructions in shared programs: 4292303 -> 4288650 (-0.09%) instructions in affected programs: 299670 -> 296017 (-1.22%) Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_context.c | 27 +++++++- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 42 +++++++++---- .../drivers/dri/i965/brw_vec4_visitor.cpp | 61 +++++++++++++------ 4 files changed, 102 insertions(+), 34 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 12f898abdb5..216b788a7ee 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -483,7 +483,32 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.QuadsFollowProvokingVertexConvention = false; ctx->Const.NativeIntegers = true; - ctx->Const.UniformBooleanTrue = 1; + + /* Regarding the CMP instruction, the Ivybridge PRM says: + * + * "For each enabled channel 0b or 1b is assigned to the appropriate flag + * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord + * 0xFFFFFFFF) is assigned to dst." + * + * but PRMs for earlier generations say + * + * "In dword format, one GRF may store up to 8 results. When the register + * is used later as a vector of Booleans, as only LSB at each channel + * contains meaning [sic] data, software should make sure all higher bits + * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." + * + * We select the representation of a true boolean uniform to match what the + * CMP instruction returns. + * + * The Sandybridge BSpec's description of the CMP instruction matches that + * of the Ivybridge PRM. (The description in the Sandybridge PRM is seems + * to have not been updated from Ironlake). Its CMP instruction behaves like + * Ivybridge and newer. + */ + if (brw->gen >= 6) + ctx->Const.UniformBooleanTrue = ~0; + else + ctx->Const.UniformBooleanTrue = 1; /* From the gen4 PRM, volume 4 page 127: * diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f1d3fb8ec67..935cbfa0973 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1190,11 +1190,11 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) /* The frontfacing comes in as a bit in the thread payload. */ if (brw->gen >= 6) { - emit(BRW_OPCODE_ASR, *reg, + emit(BRW_OPCODE_SHL, *reg, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), - fs_reg(15)); + fs_reg(16)); emit(BRW_OPCODE_NOT, *reg, *reg); - emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1)); + emit(BRW_OPCODE_ASR, *reg, *reg, fs_reg(31)); } else { struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); /* bit 31 is "primitive is back face", so checking < (1 << 31) gives diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index fa2c2269356..f2c22bbc4cf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -526,10 +526,11 @@ fs_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - /* Note that BRW_OPCODE_NOT is not appropriate here, since it is - * ones complement of the whole register, not just bit 0. - */ - emit(XOR(this->result, op[0], fs_reg(1))); + if (ctx->Const.UniformBooleanTrue != 1) { + emit(NOT(this->result, op[0])); + } else { + emit(XOR(this->result, op[0], fs_reg(1))); + } break; case ir_unop_neg: op[0].negate = !op[0].negate; @@ -697,8 +698,10 @@ fs_visitor::visit(ir_expression *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - resolve_bool_comparison(ir->operands[0], &op[0]); - resolve_bool_comparison(ir->operands[1], &op[1]); + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(ir->operands[0], &op[0]); + resolve_bool_comparison(ir->operands[1], &op[1]); + } emit(CMP(this->result, op[0], op[1], brw_conditional_for_comparison(ir->operation))); @@ -769,9 +772,16 @@ fs_visitor::visit(ir_expression *ir) emit(AND(this->result, op[0], fs_reg(1))); break; case ir_unop_b2f: - temp = fs_reg(this, glsl_type::int_type); - emit(AND(temp, op[0], fs_reg(1))); - emit(MOV(this->result, temp)); + if (ctx->Const.UniformBooleanTrue != 1) { + op[0].type = BRW_REGISTER_TYPE_UD; + this->result.type = BRW_REGISTER_TYPE_UD; + emit(AND(this->result, op[0], fs_reg(0x3f800000u))); + this->result.type = BRW_REGISTER_TYPE_F; + } else { + temp = fs_reg(this, glsl_type::int_type); + emit(AND(temp, op[0], fs_reg(1))); + emit(MOV(this->result, temp)); + } break; case ir_unop_f2b: @@ -2317,8 +2327,10 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } emit(CMP(reg_null_d, op[0], op[1], brw_conditional_for_comparison(expr->operation))); @@ -2379,8 +2391,10 @@ fs_visitor::emit_if_gen6(ir_if *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); + if (ctx->Const.UniformBooleanTrue == 1) { + resolve_bool_comparison(expr->operands[0], &op[0]); + resolve_bool_comparison(expr->operands[1], &op[1]); + } emit(IF(op[0], op[1], brw_conditional_for_comparison(expr->operation))); @@ -3220,6 +3234,8 @@ fs_visitor::resolve_ud_negate(fs_reg *reg) void fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) { + assert(ctx->Const.UniformBooleanTrue == 1); + if (rvalue->type != glsl_type::bool_type) return; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index cb9a1b1b68f..eca5d0cb01c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1281,10 +1281,11 @@ vec4_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - /* Note that BRW_OPCODE_NOT is not appropriate here, since it is - * ones complement of the whole register, not just bit 0. - */ - emit(XOR(result_dst, op[0], src_reg(1))); + if (ctx->Const.UniformBooleanTrue != 1) { + emit(NOT(result_dst, op[0])); + } else { + emit(XOR(result_dst, op[0], src_reg(1))); + } break; case ir_unop_neg: op[0].negate = !op[0].negate; @@ -1468,7 +1469,9 @@ vec4_visitor::visit(ir_expression *ir) case ir_binop_nequal: { emit(CMP(result_dst, op[0], op[1], brw_conditional_for_comparison(ir->operation))); - emit(AND(result_dst, result_src, src_reg(0x1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result_src, src_reg(1))); + } break; } @@ -1478,11 +1481,13 @@ vec4_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(1))); + inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; } else { emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z)); - emit(AND(result_dst, result_src, src_reg(0x1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result_src, src_reg(1))); + } } break; case ir_binop_any_nequal: @@ -1492,11 +1497,13 @@ vec4_visitor::visit(ir_expression *ir) emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(1))); + inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; } else { emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ)); - emit(AND(result_dst, result_src, src_reg(0x1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result_src, src_reg(1))); + } } break; @@ -1504,7 +1511,7 @@ vec4_visitor::visit(ir_expression *ir) emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(1))); + inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; @@ -1553,18 +1560,34 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_i2u: case ir_unop_u2i: case ir_unop_u2f: - case ir_unop_b2f: - case ir_unop_b2i: case ir_unop_f2i: case ir_unop_f2u: emit(MOV(result_dst, op[0])); break; + case ir_unop_b2i: + if (ctx->Const.UniformBooleanTrue != 1) { + emit(AND(result_dst, op[0], src_reg(1))); + } else { + emit(MOV(result_dst, op[0])); + } + break; + case ir_unop_b2f: + if (ctx->Const.UniformBooleanTrue != 1) { + op[0].type = BRW_REGISTER_TYPE_UD; + result_dst.type = BRW_REGISTER_TYPE_UD; + emit(AND(result_dst, op[0], src_reg(0x3f800000u))); + result_dst.type = BRW_REGISTER_TYPE_F; + } else { + emit(MOV(result_dst, op[0])); + } + break; case ir_unop_f2b: - case ir_unop_i2b: { + case ir_unop_i2b: emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - emit(AND(result_dst, result_src, src_reg(1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result_src, src_reg(1))); + } break; - } case ir_unop_trunc: emit(RNDZ(result_dst, op[0])); @@ -1703,11 +1726,15 @@ vec4_visitor::visit(ir_expression *ir) const_offset % 16 / 4, const_offset % 16 / 4); - /* UBO bools are any nonzero int. We store bools as either 0 or 1. */ + /* UBO bools are any nonzero int. We need to convert them to use the + * value of true stored in ctx->Const.UniformBooleanTrue. + */ if (ir->type->base_type == GLSL_TYPE_BOOL) { emit(CMP(result_dst, packed_consts, src_reg(0u), BRW_CONDITIONAL_NZ)); - emit(AND(result_dst, result, src_reg(0x1))); + if (ctx->Const.UniformBooleanTrue == 1) { + emit(AND(result_dst, result, src_reg(1))); + } } else { emit(MOV(result_dst, packed_consts)); } -- 2.30.2