From ab0d1b3b3d17bab2444674aa06f0a8458f6f0821 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 31 Dec 2019 00:10:28 -0800 Subject: [PATCH] intel/fs: Rework fs_inst::is_copy_payload() into multiple classification helpers. This reworks the current fs_inst::is_copy_payload() method into a number of classification helpers with well-defined semantics. This will be useful later on in order to optimize LOAD_PAYLOAD instructions more aggressively in cases where we can determine it's safe to do so. The closest equivalent of the present fs_inst::is_copy_payload() method is the is_coalescing_payload() helper introduced here. No functional nor shader-db changes. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs.cpp | 28 ----- src/intel/compiler/brw_fs_cse.cpp | 2 +- .../compiler/brw_fs_register_coalesce.cpp | 2 +- src/intel/compiler/brw_ir_fs.h | 100 +++++++++++++++++- 4 files changed, 101 insertions(+), 31 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index ef2cd177d66..c801216fc19 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -427,34 +427,6 @@ fs_inst::has_source_and_destination_hazard() const } } -bool -fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const -{ - if (this->opcode != SHADER_OPCODE_LOAD_PAYLOAD) - return false; - - fs_reg reg = this->src[0]; - if (reg.file != VGRF || reg.offset != 0 || reg.stride != 1) - return false; - - if (grf_alloc.sizes[reg.nr] * REG_SIZE != this->size_written) - return false; - - for (int i = 0; i < this->sources; i++) { - reg.type = this->src[i].type; - if (!this->src[i].equals(reg)) - return false; - - if (i < this->header_size) { - reg.offset += REG_SIZE; - } else { - reg = horiz_offset(reg, this->exec_size); - } - } - - return true; -} - bool fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) const { diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index f348f915e78..b7c32f3907f 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -105,7 +105,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case SHADER_OPCODE_COS: return inst->mlen < 2; case SHADER_OPCODE_LOAD_PAYLOAD: - return !inst->is_copy_payload(v->alloc); + return !is_coalescing_payload(v->alloc, inst); default: return inst->is_send_from_grf() && !inst->has_side_effects() && !inst->is_volatile(); diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index 4fe6773da54..8127b29369c 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -86,7 +86,7 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) return false; if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { - if (!inst->is_copy_payload(v->alloc)) { + if (!is_coalescing_payload(v->alloc, inst)) { return false; } } diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 24e523c5270..973c9fb168d 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -350,7 +350,6 @@ public: bool is_send_from_grf() const; bool is_payload(unsigned arg) const; bool is_partial_write() const; - bool is_copy_payload(const brw::simple_allocator &grf_alloc) const; unsigned components_read(unsigned i) const; unsigned size_read(int arg) const; bool can_do_source_mods(const struct gen_device_info *devinfo) const; @@ -570,4 +569,103 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo, return false; } +/** + * Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from + * the specified register file into a VGRF. + * + * This implies identity register regions without any source-destination + * overlap, but otherwise has no implications on the location of sources and + * destination in the register file: Gathering any number of portions from + * multiple virtual registers in any order is allowed. + */ +inline bool +is_copy_payload(brw_reg_file file, const fs_inst *inst) +{ + if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD || + inst->is_partial_write() || inst->saturate || + inst->dst.file != VGRF) + return false; + + for (unsigned i = 0; i < inst->sources; i++) { + if (inst->src[i].file != file || + inst->src[i].abs || inst->src[i].negate) + return false; + + if (!inst->src[i].is_contiguous()) + return false; + + if (regions_overlap(inst->dst, inst->size_written, + inst->src[i], inst->size_read(i))) + return false; + } + + return true; +} + +/** + * Like is_copy_payload(), but the instruction is required to copy a single + * contiguous block of registers from the given register file into the + * destination without any reordering. + */ +inline bool +is_identity_payload(brw_reg_file file, const fs_inst *inst) { + if (is_copy_payload(file, inst)) { + fs_reg reg = inst->src[0]; + + for (unsigned i = 0; i < inst->sources; i++) { + reg.type = inst->src[i].type; + if (!inst->src[i].equals(reg)) + return false; + + reg = byte_offset(reg, inst->size_read(i)); + } + + return true; + } else { + return false; + } +} + +/** + * Like is_copy_payload(), but the instruction is required to source data from + * at least two disjoint VGRFs. + * + * This doesn't necessarily rule out the elimination of this instruction + * through register coalescing, but due to limitations of the register + * coalesce pass it might be impossible to do so directly until a later stage, + * when the LOAD_PAYLOAD instruction is unrolled into a sequence of MOV + * instructions. + */ +inline bool +is_multi_copy_payload(const fs_inst *inst) { + if (is_copy_payload(VGRF, inst)) { + for (unsigned i = 0; i < inst->sources; i++) { + if (inst->src[i].nr != inst->src[0].nr) + return true; + } + } + + return false; +} + +/** + * Like is_identity_payload(), but the instruction is required to copy the + * whole contents of a single VGRF into the destination. + * + * This means that there is a good chance that the instruction will be + * eliminated through register coalescing, but it's neither a necessary nor a + * sufficient condition for that to happen -- E.g. consider the case where + * source and destination registers diverge due to other instructions in the + * program overwriting part of their contents, which isn't something we can + * predict up front based on a cheap strictly local test of the copy + * instruction. + */ +inline bool +is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst) +{ + return is_identity_payload(VGRF, inst) && + inst->src[0].offset == 0 && + alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written; +} + #endif -- 2.30.2