From f4ef34f207d15bcade7aed644328035dd0f2cc16 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 29 May 2019 17:46:55 -0500 Subject: [PATCH] intel/fs: Add an UNDEF instruction to avoid excess live ranges With 8 and 16-bit types and anything where we have to use non-trivial strides registersto deal with restrictions, we end up with things that look like partial writes even though we don't care about any values in the register except those written by that instruction. This is particularly important when dealing with loops because liveness sees is_partial_write and the fact that an old version from a previous loop iteration may be valid at that point and extends all purely partially written values to the entire loop. This commit adds a new UNDEF instruction which does nothing (the generator doesn't emit anything) but which does a fake write to the register. This informs liveness that we don't care about any values before that point so it won't consider those registers to be falsely live. We can safely emit UNDEF instructions for all SSA values that come in from NIR and nearly all temporaries generated by various stages of the compiler. In particular, we need to insert UNDEF instructions when we handle region restrictions because the newly allocated registers are almost guaranteed to be partially written. No shader-db changes. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110432 Reviewed-by: Matt Turner --- src/intel/compiler/brw_eu_defines.h | 8 ++++++++ src/intel/compiler/brw_fs_builder.h | 11 +++++++++++ src/intel/compiler/brw_fs_generator.cpp | 3 +++ src/intel/compiler/brw_fs_lower_regioning.cpp | 14 +++++++++----- src/intel/compiler/brw_fs_nir.cpp | 1 + src/intel/compiler/brw_shader.cpp | 3 +++ 6 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 66e6c53c2b7..933037c4df3 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -322,6 +322,14 @@ enum opcode { */ SHADER_OPCODE_SEND, + /** + * An "undefined" write which does nothing but indicates to liveness that + * we don't care about any values in the register which predate this + * instruction. Used to prevent partial writes from causing issues with + * live ranges. + */ + SHADER_OPCODE_UNDEF, + /** * Texture sampling opcodes. * diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index a69e3c6ae80..9655c2ef554 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -706,6 +706,17 @@ namespace brw { return inst; } + instruction * + UNDEF(const dst_reg &dst) const + { + assert(dst.file == VGRF); + instruction *inst = emit(SHADER_OPCODE_UNDEF, + retype(dst, BRW_REGISTER_TYPE_UD)); + inst->size_written = shader->alloc.sizes[dst.nr] * REG_SIZE; + + return inst; + } + backend_shader *shader; private: diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index f91c857678a..7adc8a7938b 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1653,6 +1653,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg); foreach_block_and_inst (block, fs_inst, inst, cfg) { + if (inst->opcode == SHADER_OPCODE_UNDEF) + continue; + struct brw_reg src[4], dst; unsigned int last_insn_offset = p->next_insn_offset; bool multiple_instructions_emitted = false; diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index a76fd262a10..98699c38d84 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -288,7 +288,9 @@ namespace { const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 : type_sz(inst->dst.type) * inst->dst.stride / type_sz(type); - const fs_reg tmp = horiz_stride(ibld.vgrf(type, stride), stride); + fs_reg tmp = ibld.vgrf(type, stride); + ibld.UNDEF(tmp); + tmp = horiz_stride(tmp, stride); /* Emit a MOV taking care of all the destination modifiers. */ fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp); @@ -329,8 +331,9 @@ namespace { const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride / type_sz(inst->src[i].type); assert(stride > 0); - const fs_reg tmp = horiz_stride(ibld.vgrf(inst->src[i].type, stride), - stride); + fs_reg tmp = ibld.vgrf(inst->src[i].type, stride); + ibld.UNDEF(tmp); + tmp = horiz_stride(tmp, stride); /* Emit a series of 32-bit integer copies with any source modifiers * cleaned up (because their semantics are dependent on the type). @@ -377,8 +380,9 @@ namespace { const unsigned stride = required_dst_byte_stride(inst) / type_sz(inst->dst.type); assert(stride > 0); - const fs_reg tmp = horiz_stride(ibld.vgrf(inst->dst.type, stride), - stride); + fs_reg tmp = ibld.vgrf(inst->dst.type, stride); + ibld.UNDEF(tmp); + tmp = horiz_stride(tmp, stride); /* Emit a series of 32-bit integer copies from the temporary into the * original destination. diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 77b131272ca..eef21294d07 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1951,6 +1951,7 @@ fs_visitor::get_nir_dest(const nir_dest &dest) BRW_REGISTER_TYPE_F); nir_ssa_values[dest.ssa.index] = bld.vgrf(reg_type, dest.ssa.num_components); + bld.UNDEF(nir_ssa_values[dest.ssa.index]); return nir_ssa_values[dest.ssa.index]; } else { /* We don't handle indirects on locals */ diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 1f98bd08224..2061afc1c24 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -217,6 +217,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_SEND: return "send"; + case SHADER_OPCODE_UNDEF: + return "undef"; + case SHADER_OPCODE_TEX: return "tex"; case SHADER_OPCODE_TEX_LOGICAL: -- 2.30.2