From cb3e21cd1925c9378b4acb869601bbb011d0de97 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 9 Feb 2017 15:21:47 +0000 Subject: [PATCH] intel/fs: Use De Morgan's laws to avoid logical-not of a logic result on Gen8+ MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Instead of emitting ~(a & b), emit (~a | ~b) since logical-not of operands is free on Gen8+. v2: Fix swizzles. Fix types for cmod propagation. v3: Simplify logic for inverting source of inot(ixor(a, b)). Suggested by Ken. Skylake and Broadwell had similar results. (Skylake shown) Skylake total instructions in shared programs: 15185593 -> 15185583 (<.01%) instructions in affected programs: 5673 -> 5663 (-0.18%) helped: 12 HURT: 1 helped stats (abs) min: 1 max: 2 x̄: 1.17 x̃: 1 helped stats (rel) min: 0.30% max: 5.88% x̄: 1.50% x̃: 0.70% HURT stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4 HURT stats (rel) min: 0.12% max: 0.12% x̄: 0.12% x̃: 0.12% 95% mean confidence interval for instructions value: -1.66 0.13 95% mean confidence interval for instructions %-change: -2.60% -0.15% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 370977726 -> 370964249 (<.01%) cycles in affected programs: 869987 -> 856510 (-1.55%) helped: 15 HURT: 2 helped stats (abs) min: 2 max: 6640 x̄: 902.20 x̃: 16 helped stats (rel) min: <.01% max: 4.92% x̄: 1.71% x̃: 1.53% HURT stats (abs) min: 14 max: 42 x̄: 28.00 x̃: 28 HURT stats (rel) min: 1.08% max: 3.18% x̄: 2.13% x̃: 2.13% 95% mean confidence interval for cycles value: -1654.87 69.34 95% mean confidence interval for cycles %-change: -2.29% -0.23% Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs_nir.cpp | 59 +++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 23b21f1d680..110473bfa85 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1155,6 +1155,65 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_inot: if (devinfo->gen >= 8) { + nir_alu_instr *const inot_src_instr = + nir_src_as_alu_instr(&instr->src[0].src); + + if (inot_src_instr != NULL && + (inot_src_instr->op == nir_op_ior || + inot_src_instr->op == nir_op_ixor || + inot_src_instr->op == nir_op_iand) && + !inot_src_instr->src[0].abs && + !inot_src_instr->src[0].negate && + !inot_src_instr->src[1].abs && + !inot_src_instr->src[1].negate) { + /* The sources of the source logical instruction are now the + * sources of the instruction that will be generated. + */ + prepare_alu_destination_and_sources(bld, inot_src_instr, op, false); + resolve_inot_sources(bld, inot_src_instr, op); + + /* Smash all of the sources and destination to be signed. This + * doesn't matter for the operation of the instruction, but cmod + * propagation fails on unsigned sources with negation (due to + * fs_inst::can_do_cmod returning false). + */ + result.type = + brw_type_for_nir_type(devinfo, + (nir_alu_type)(nir_type_int | + nir_dest_bit_size(instr->dest.dest))); + op[0].type = + brw_type_for_nir_type(devinfo, + (nir_alu_type)(nir_type_int | + nir_src_bit_size(inot_src_instr->src[0].src))); + op[1].type = + brw_type_for_nir_type(devinfo, + (nir_alu_type)(nir_type_int | + nir_src_bit_size(inot_src_instr->src[1].src))); + + /* For XOR, only invert one of the sources. Arbitrarily choose + * the first source. + */ + op[0].negate = !op[0].negate; + if (inot_src_instr->op != nir_op_ixor) + op[1].negate = !op[1].negate; + + switch (inot_src_instr->op) { + case nir_op_ior: + bld.AND(result, op[0], op[1]); + return; + + case nir_op_iand: + bld.OR(result, op[0], op[1]); + return; + + case nir_op_ixor: + bld.XOR(result, op[0], op[1]); + return; + + default: + unreachable("impossible opcode"); + } + } op[0] = resolve_source_modifiers(op[0]); } bld.NOT(result, op[0]); -- 2.30.2