From acd7796a079bd4ac0cadb2e6bb73033e79310aaf Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 11 Jun 2019 11:08:49 -0700 Subject: [PATCH] intel/vec4: Try to emit a VF source in try_immediate_source MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit is also a pre-requisite for the next commit. No shader-db changes on any Gen8+ platform as these platforms do not use the vec4 backend. v2: Massive rebase on eeebeb211f1 ("intel/vec4: Try emitting non-scalar immediates"). This change is a lot less helpful since that commit landed (previously helped 1934 shaders on HSW) because, apparently, a lot of the cases helped by that commit were things like vector loads of { 1.0, 1.0, 1.0 } that were also helped by this commit. Haswell total instructions in shared programs: 13480095 -> 13478598 (-0.01%) instructions in affected programs: 229534 -> 228037 (-0.65%) helped: 1006 HURT: 0 helped stats (abs) min: 1 max: 7 x̄: 1.49 x̃: 1 helped stats (rel) min: 0.04% max: 3.45% x̄: 1.11% x̃: 1.09% 95% mean confidence interval for instructions value: -1.54 -1.43 95% mean confidence interval for instructions %-change: -1.15% -1.07% Instructions are helped. total cycles in shared programs: 376385734 -> 376386916 (<.01%) cycles in affected programs: 14101380 -> 14102562 (<.01%) helped: 941 HURT: 56 helped stats (abs) min: 2 max: 322 x̄: 5.62 x̃: 2 helped stats (rel) min: <.01% max: 7.74% x̄: 0.51% x̃: 0.42% HURT stats (abs) min: 2 max: 618 x̄: 115.50 x̃: 32 HURT stats (rel) min: 0.03% max: 4.62% x̄: 0.83% x̃: 0.44% 95% mean confidence interval for cycles value: -2.06 4.43 95% mean confidence interval for cycles %-change: -0.47% -0.39% Inconclusive result (value mean confidence interval includes 0). Ivy Bridge total instructions in shared programs: 12048004 -> 12046589 (-0.01%) instructions in affected programs: 217072 -> 215657 (-0.65%) helped: 934 HURT: 0 helped stats (abs) min: 1 max: 7 x̄: 1.51 x̃: 1 helped stats (rel) min: 0.04% max: 3.45% x̄: 1.14% x̃: 1.11% 95% mean confidence interval for instructions value: -1.57 -1.46 95% mean confidence interval for instructions %-change: -1.18% -1.10% Instructions are helped. total cycles in shared programs: 180285854 -> 180287608 (<.01%) cycles in affected programs: 14103824 -> 14105578 (0.01%) helped: 871 HURT: 53 helped stats (abs) min: 2 max: 322 x̄: 5.51 x̃: 2 helped stats (rel) min: <.01% max: 7.67% x̄: 0.50% x̃: 0.42% HURT stats (abs) min: 2 max: 618 x̄: 123.66 x̃: 32 HURT stats (rel) min: 0.03% max: 4.47% x̄: 0.92% x̃: 0.46% 95% mean confidence interval for cycles value: -1.60 5.39 95% mean confidence interval for cycles %-change: -0.46% -0.37% Inconclusive result (value mean confidence interval includes 0). Sandy Bridge total instructions in shared programs: 10861227 -> 10860328 (<.01%) instructions in affected programs: 92969 -> 92070 (-0.97%) helped: 624 HURT: 0 helped stats (abs) min: 1 max: 7 x̄: 1.44 x̃: 1 helped stats (rel) min: 0.11% max: 3.45% x̄: 1.05% x̃: 0.95% 95% mean confidence interval for instructions value: -1.52 -1.36 95% mean confidence interval for instructions %-change: -1.09% -1.01% Instructions are helped. total cycles in shared programs: 153944316 -> 153942720 (<.01%) cycles in affected programs: 1640956 -> 1639360 (-0.10%) helped: 601 HURT: 15 helped stats (abs) min: 2 max: 120 x̄: 3.56 x̃: 2 helped stats (rel) min: 0.02% max: 6.33% x̄: 0.18% x̃: 0.08% HURT stats (abs) min: 2 max: 72 x̄: 36.13 x̃: 36 HURT stats (rel) min: 0.05% max: 3.84% x̄: 1.95% x̃: 2.00% 95% mean confidence interval for cycles value: -3.44 -1.74 95% mean confidence interval for cycles %-change: -0.18% -0.09% Cycles are helped. Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8139924 -> 8139378 (<.01%) instructions in affected programs: 69776 -> 69230 (-0.78%) helped: 322 HURT: 0 helped stats (abs) min: 1 max: 8 x̄: 1.70 x̃: 1 helped stats (rel) min: 0.27% max: 3.23% x̄: 0.79% x̃: 0.54% 95% mean confidence interval for instructions value: -1.88 -1.51 95% mean confidence interval for instructions %-change: -0.85% -0.72% Instructions are helped. total cycles in shared programs: 188542864 -> 188541756 (<.01%) cycles in affected programs: 3031532 -> 3030424 (-0.04%) helped: 320 HURT: 0 helped stats (abs) min: 2 max: 20 x̄: 3.46 x̃: 2 helped stats (rel) min: <.01% max: 0.69% x̄: 0.06% x̃: 0.06% 95% mean confidence interval for cycles value: -3.85 -3.07 95% mean confidence interval for cycles %-change: -0.06% -0.05% Cycles are helped. Reviewed-by: Matt Turner --- src/intel/compiler/brw_vec4_nir.cpp | 45 +++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 7d0394bdc67..157a97bfc03 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -1088,29 +1088,50 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op, case BRW_REGISTER_TYPE_F: { int first_comp = -1; - float f; + float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + bool is_scalar = true; for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { if (nir_alu_instr_channel_used(instr, idx, i)) { + f[i] = nir_src_comp_as_float(instr->src[idx].src, + instr->src[idx].swizzle[i]); if (first_comp < 0) { first_comp = i; - f = nir_src_comp_as_float(instr->src[idx].src, - instr->src[idx].swizzle[i]); - } else if (f != nir_src_comp_as_float(instr->src[idx].src, - instr->src[idx].swizzle[i])) { - return -1; + } else if (f[first_comp] != f[i]) { + is_scalar = false; } } } - if (op[idx].abs) - f = fabs(f); + if (is_scalar) { + if (op[idx].abs) + f[first_comp] = fabs(f[first_comp]); + + if (op[idx].negate) + f[first_comp] = -f[first_comp]; + + op[idx] = src_reg(brw_imm_f(f[first_comp])); + assert(op[idx].type == old_type); + } else { + uint8_t vf_values[4] = { 0, 0, 0, 0 }; - if (op[idx].negate) - f = -f; + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { + if (op[idx].abs) + f[i] = fabs(f[i]); - op[idx] = src_reg(brw_imm_f(f)); - assert(op[idx].type == old_type); + if (op[idx].negate) + f[i] = -f[i]; + + const int vf = brw_float_to_vf(f[i]); + if (vf == -1) + return -1; + + vf_values[i] = vf; + } + + op[idx] = src_reg(brw_imm_vf4(vf_values[0], vf_values[1], + vf_values[2], vf_values[3])); + } break; } -- 2.30.2