From acfa2340e60f83f927c8f4426773580d34239c18 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Sep 2019 19:34:42 -0500 Subject: [PATCH] intel/fs: Handle UNDEF in split_virtual_grfs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When the UNDEF instruction was added, we didn't do anything special in split_virtual_grfs. This mean that anything with an UNDEF wasn't getting split which causes problems for the compiler. Among other things, it makes RA harder because things are in bigger chunks. It also meant that dvec4s weren't getting split which means that they are larger than the maximum register size. Shader-db results on Kaby Lake: total instructions in shared programs: 14959202 -> 14960035 (<.01%) instructions in affected programs: 96197 -> 97030 (0.87%) helped: 140 HURT: 128 helped stats (abs) min: 1 max: 17 x̄: 1.62 x̃: 1 helped stats (rel) min: 0.09% max: 6.15% x̄: 0.65% x̃: 0.45% HURT stats (abs) min: 1 max: 825 x̄: 8.28 x̃: 1 HURT stats (rel) min: 0.13% max: 139.83% x̄: 1.70% x̃: 0.50% 95% mean confidence interval for instructions value: -2.96 9.18 95% mean confidence interval for instructions %-change: -0.56% 1.51% Inconclusive result (value mean confidence interval includes 0). total loops in shared programs: 4372 -> 4372 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total cycles in shared programs: 352646771 -> 352840997 (0.06%) cycles in affected programs: 218600800 -> 218795026 (0.09%) helped: 21167 HURT: 21411 helped stats (abs) min: 1 max: 2924 x̄: 36.89 x̃: 10 helped stats (rel) min: <.01% max: 41.90% x̄: 2.97% x̃: 0.98% HURT stats (abs) min: 1 max: 26027 x̄: 45.54 x̃: 10 HURT stats (rel) min: <.01% max: 324.46% x̄: 3.88% x̃: 1.06% 95% mean confidence interval for cycles value: 2.87 6.26 95% mean confidence interval for cycles %-change: 0.40% 0.55% Cycles are HURT. total spills in shared programs: 8840 -> 8953 (1.28%) spills in affected programs: 126 -> 239 (89.68%) helped: 1 HURT: 2 total fills in shared programs: 21782 -> 21914 (0.61%) fills in affected programs: 431 -> 563 (30.63%) helped: 1 HURT: 3 LOST: 0 GAINED: 5 Shader-db results on Haswell: total instructions in shared programs: 13320918 -> 13320769 (<.01%) instructions in affected programs: 40998 -> 40849 (-0.36%) helped: 146 HURT: 56 helped stats (abs) min: 1 max: 8 x̄: 2.73 x̃: 2 helped stats (rel) min: 0.16% max: 8.60% x̄: 2.52% x̃: 2.22% HURT stats (abs) min: 2 max: 23 x̄: 4.45 x̃: 4 HURT stats (rel) min: 0.21% max: 10.26% x̄: 6.83% x̃: 10.26% 95% mean confidence interval for instructions value: -1.26 -0.21 95% mean confidence interval for instructions %-change: -0.62% 0.77% Inconclusive result (%-change mean confidence interval includes 0). total loops in shared programs: 4373 -> 4373 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total cycles in shared programs: 374518258 -> 374384193 (-0.04%) cycles in affected programs: 231101954 -> 230967889 (-0.06%) helped: 21427 HURT: 19438 helped stats (abs) min: 1 max: 2035 x̄: 31.09 x̃: 8 helped stats (rel) min: <.01% max: 40.95% x̄: 2.42% x̃: 0.86% HURT stats (abs) min: 1 max: 20875 x̄: 27.38 x̃: 8 HURT stats (rel) min: <.01% max: 59.09% x̄: 2.49% x̃: 0.80% 95% mean confidence interval for cycles value: -4.49 -2.07 95% mean confidence interval for cycles %-change: -0.14% -0.04% Cycles are helped. total spills in shared programs: 23406 -> 23411 (0.02%) spills in affected programs: 3 -> 8 (166.67%) helped: 0 HURT: 2 total fills in shared programs: 34845 -> 34850 (0.01%) fills in affected programs: 3 -> 8 (166.67%) helped: 0 HURT: 2 LOST: 0 GAINED: 0 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111566 Fixes: f4ef34f207d1 "intel/fs: Add an UNDEF instruction to avoid..." Reviewed-by: Francisco Jerez --- src/intel/compiler/brw_fs.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index f614fa85884..4d54dd0e488 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1910,6 +1910,17 @@ fs_visitor::split_virtual_grfs() } foreach_block_and_inst(block, fs_inst, inst, cfg) { + /* We fix up undef instructions later */ + if (inst->opcode == SHADER_OPCODE_UNDEF) { + /* UNDEF instructions are currently only used to undef entire + * registers. We need this invariant later when we split them. + */ + assert(inst->dst.file == VGRF); + assert(inst->dst.offset == 0); + assert(inst->size_written == alloc.sizes[inst->dst.nr] * REG_SIZE); + continue; + } + if (inst->dst.file == VGRF) { int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; for (unsigned j = 1; j < regs_written(inst); j++) @@ -1962,7 +1973,20 @@ fs_visitor::split_virtual_grfs() } assert(reg == reg_count); - foreach_block_and_inst(block, fs_inst, inst, cfg) { + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + if (inst->opcode == SHADER_OPCODE_UNDEF) { + const fs_builder ibld(this, block, inst); + assert(inst->size_written % REG_SIZE == 0); + unsigned reg_offset = 0; + while (reg_offset < inst->size_written / REG_SIZE) { + reg = vgrf_to_reg[inst->dst.nr] + reg_offset; + ibld.UNDEF(fs_reg(VGRF, new_virtual_grf[reg], inst->dst.type)); + reg_offset += alloc.sizes[new_virtual_grf[reg]]; + } + inst->remove(block); + continue; + } + if (inst->dst.file == VGRF) { reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; inst->dst.nr = new_virtual_grf[reg]; -- 2.30.2