From d2056ab9936ee67032e1a69bcbf2f1783d67d3c5 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 13 Dec 2018 15:39:49 -0800 Subject: [PATCH] intel/vec4: Emit constants for some ALU sources as immediate values MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In some cases of flow control, the constant propagation is not able to determine that the source of an instruction must be a constant value. When we still have NIR SSA values, we can easily determine this. Emit the immediate value during code generation to possible avoid spurious loads of constants into registers. I wrote this patch to prevent a couple trivial regressions in vec4 shaders caused by "nir/algebraic: Replace i2b used by bcsel or if-statement with comparison". The final result was quite a bit better than that... No shader-db changes on any Gen8+ platform. v2: Assert that we never get a negation source modifier on Gen8+. Suggested by Ken. This should never happen because we don't normally use vec4 for Gen8+ (requires and environment variable to force it), and there's no code to generate these negations. Still, erring on the side of caution is better. Haswell total instructions in shared programs: 13776218 -> 13764783 (-0.08%) instructions in affected programs: 663931 -> 652496 (-1.72%) helped: 3495 HURT: 1 helped stats (abs) min: 1 max: 30 x̄: 3.28 x̃: 2 helped stats (rel) min: 0.21% max: 10.00% x̄: 1.79% x̃: 1.49% HURT stats (abs) min: 24 max: 24 x̄: 24.00 x̃: 24 HURT stats (rel) min: 12.24% max: 12.24% x̄: 12.24% x̃: 12.24% 95% mean confidence interval for instructions value: -3.39 -3.15 95% mean confidence interval for instructions %-change: -1.84% -1.75% Instructions are helped. total cycles in shared programs: 386818984 -> 386511910 (-0.08%) cycles in affected programs: 20379636 -> 20072562 (-1.51%) helped: 3052 HURT: 476 helped stats (abs) min: 2 max: 12516 x̄: 110.40 x̃: 6 helped stats (rel) min: 0.05% max: 24.68% x̄: 1.58% x̃: 0.69% HURT stats (abs) min: 2 max: 416 x̄: 62.76 x̃: 24 HURT stats (rel) min: 0.10% max: 10.75% x̄: 4.03% x̃: 2.18% 95% mean confidence interval for cycles value: -115.57 -58.51 95% mean confidence interval for cycles %-change: -0.93% -0.73% Cycles are helped. total spills in shared programs: 100482 -> 100480 (<.01%) spills in affected programs: 79 -> 77 (-2.53%) helped: 3 HURT: 1 total fills in shared programs: 96883 -> 96877 (<.01%) fills in affected programs: 85 -> 79 (-7.06%) helped: 4 HURT: 0 Ivy Bridge total instructions in shared programs: 12000562 -> 11990113 (-0.09%) instructions in affected programs: 572581 -> 562132 (-1.82%) helped: 3106 HURT: 0 helped stats (abs) min: 1 max: 30 x̄: 3.36 x̃: 2 helped stats (rel) min: 0.21% max: 10.00% x̄: 1.86% x̃: 1.49% 95% mean confidence interval for instructions value: -3.49 -3.23 95% mean confidence interval for instructions %-change: -1.91% -1.81% Instructions are helped. total cycles in shared programs: 180958504 -> 180664500 (-0.16%) cycles in affected programs: 19991810 -> 19697806 (-1.47%) helped: 2654 HURT: 486 helped stats (abs) min: 2 max: 12516 x̄: 121.61 x̃: 6 helped stats (rel) min: 0.05% max: 20.66% x̄: 1.48% x̃: 0.68% HURT stats (abs) min: 2 max: 396 x̄: 59.18 x̃: 24 HURT stats (rel) min: 0.05% max: 9.62% x̄: 3.82% x̃: 2.16% 95% mean confidence interval for cycles value: -125.62 -61.64 95% mean confidence interval for cycles %-change: -0.76% -0.56% Cycles are helped. Sandy Bridge total instructions in shared programs: 10842336 -> 10835438 (-0.06%) instructions in affected programs: 395340 -> 388442 (-1.74%) helped: 1926 HURT: 0 helped stats (abs) min: 1 max: 22 x̄: 3.58 x̃: 2 helped stats (rel) min: 0.10% max: 9.68% x̄: 1.78% x̃: 1.42% 95% mean confidence interval for instructions value: -3.73 -3.43 95% mean confidence interval for instructions %-change: -1.84% -1.72% Instructions are helped. total cycles in shared programs: 154590074 -> 154569050 (-0.01%) cycles in affected programs: 8159932 -> 8138908 (-0.26%) helped: 1670 HURT: 228 helped stats (abs) min: 2 max: 260 x̄: 18.13 x̃: 6 helped stats (rel) min: 0.02% max: 8.70% x̄: 0.74% x̃: 0.28% HURT stats (abs) min: 2 max: 1798 x̄: 40.58 x̃: 14 HURT stats (rel) min: 0.03% max: 12.97% x̄: 1.04% x̃: 0.31% 95% mean confidence interval for cycles value: -13.51 -8.64 95% mean confidence interval for cycles %-change: -0.60% -0.46% Cycles are helped. Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8212357 -> 8206587 (-0.07%) instructions in affected programs: 323664 -> 317894 (-1.78%) helped: 1457 HURT: 0 helped stats (abs) min: 1 max: 12 x̄: 3.96 x̃: 3 helped stats (rel) min: 0.33% max: 11.49% x̄: 1.86% x̃: 1.44% 95% mean confidence interval for instructions value: -4.14 -3.78 95% mean confidence interval for instructions %-change: -1.93% -1.78% Instructions are helped. total cycles in shared programs: 187668016 -> 187657422 (<.01%) cycles in affected programs: 14856234 -> 14845640 (-0.07%) helped: 1372 HURT: 83 helped stats (abs) min: 2 max: 24 x̄: 7.92 x̃: 6 helped stats (rel) min: 0.02% max: 1.14% x̄: 0.12% x̃: 0.08% HURT stats (abs) min: 2 max: 14 x̄: 3.20 x̃: 2 HURT stats (rel) min: 0.03% max: 0.60% x̄: 0.12% x̃: 0.12% 95% mean confidence interval for cycles value: -7.65 -6.91 95% mean confidence interval for cycles %-change: -0.11% -0.10% Cycles are helped. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_vec4_nir.cpp | 71 +++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index d84b0f6b235..882e97a1bf1 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -1008,6 +1008,66 @@ vec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src, inst->saturate = saturate; } +/** + * Try to use an immediate value for source 1 + * + * In cases of flow control, constant propagation is sometimes unable to + * determine that a register contains a constant value. To work around this, + * try to emit a literal as the second source here. + */ +static void +try_immediate_source(const nir_alu_instr *instr, src_reg *op, + MAYBE_UNUSED const gen_device_info *devinfo) +{ + if (nir_src_num_components(instr->src[1].src) != 1 || + nir_src_bit_size(instr->src[1].src) != 32 || + !nir_src_is_const(instr->src[1].src)) + return; + + const enum brw_reg_type old_type = op->type; + + switch (old_type) { + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_UD: { + int d = nir_src_as_int(instr->src[1].src); + + if (op->abs) + d = MAX2(-d, d); + + if (op->negate) { + /* On Gen8+ a negation source modifier on a logical operation means + * something different. Nothing should generate this, so assert that + * it does not occur. + */ + assert(devinfo->gen < 8 || (instr->op != nir_op_iand && + instr->op != nir_op_ior && + instr->op != nir_op_ixor)); + d = -d; + } + + *op = retype(src_reg(brw_imm_d(d)), old_type); + break; + } + + case BRW_REGISTER_TYPE_F: { + float f = nir_src_as_float(instr->src[1].src); + + if (op->abs) + f = fabs(f); + + if (op->negate) + f = -f; + + *op = src_reg(brw_imm_f(f)); + assert(op->type == old_type); + break; + } + + default: + unreachable("Non-32bit type."); + } +} + void vec4_visitor::nir_emit_alu(nir_alu_instr *instr) { @@ -1066,6 +1126,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fadd: + try_immediate_source(instr, &op[1], devinfo); inst = emit(ADD(dst, op[0], op[1])); inst->saturate = instr->dest.saturate; break; @@ -1077,6 +1138,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fmul: + try_immediate_source(instr, &op[1], devinfo); inst = emit(MUL(dst, op[0], op[1])); inst->saturate = instr->dest.saturate; break; @@ -1304,6 +1366,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fmin: + try_immediate_source(instr, &op[1], devinfo); inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; @@ -1313,6 +1376,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) assert(nir_dest_bit_size(instr->dest.dest) < 64); /* fall through */ case nir_op_fmax: + try_immediate_source(instr, &op[1], devinfo); inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; @@ -1341,6 +1405,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) brw_conditional_for_nir_comparison(instr->op); if (nir_src_bit_size(instr->src[0].src) < 64) { + try_immediate_source(instr, &op[1], devinfo); emit(CMP(dst, op[0], op[1], conditional_mod)); } else { /* Produce a 32-bit boolean result from the DF comparison by selecting @@ -1410,6 +1475,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } + try_immediate_source(instr, &op[1], devinfo); emit(XOR(dst, op[0], op[1])); break; @@ -1419,6 +1485,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } + try_immediate_source(instr, &op[1], devinfo); emit(OR(dst, op[0], op[1])); break; @@ -1428,6 +1495,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } + try_immediate_source(instr, &op[1], devinfo); emit(AND(dst, op[0], op[1])); break; @@ -1737,16 +1805,19 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ishl: assert(nir_dest_bit_size(instr->dest.dest) < 64); + try_immediate_source(instr, &op[1], devinfo); emit(SHL(dst, op[0], op[1])); break; case nir_op_ishr: assert(nir_dest_bit_size(instr->dest.dest) < 64); + try_immediate_source(instr, &op[1], devinfo); emit(ASR(dst, op[0], op[1])); break; case nir_op_ushr: assert(nir_dest_bit_size(instr->dest.dest) < 64); + try_immediate_source(instr, &op[1], devinfo); emit(SHR(dst, op[0], op[1])); break; -- 2.30.2