From aeecc82d057adf43189d08214b21ca5166ad9682 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= Date: Fri, 20 Jan 2017 08:47:05 +0100 Subject: [PATCH] i965/fs: generalize the legalization d2x pass MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Generalize it to lower any unsupported narrower conversion. v2 (Curro): - Add supports_type_conversion() - Reuse existing intruction instead of cloning it. - Generalize d2x to narrower and equal size conversions. v3 (Curro): - Make supports_type_conversion() const and improve it. - Use foreach_block_and_inst to process added instructions. - Simplify code. - Add assert and improve comments. - Remove redundant mov. - Remove useless comment. - Remove saturate == false assert and add support for saturation when fixing the conversion. - Add get_exec_type() function. v4 (Curro): - Use get_exec_type() function to get sources' type. Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Francisco Jerez --- src/intel/compiler/brw_fs.cpp | 11 +-- src/intel/compiler/brw_fs_lower_d2x.cpp | 93 ++++++++++++++++--------- 2 files changed, 67 insertions(+), 37 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 086b1a04855..8eb8789905c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5694,11 +5694,6 @@ fs_visitor::optimize() OPT(dead_code_eliminate); } - if (OPT(lower_d2x)) { - OPT(opt_copy_propagation); - OPT(dead_code_eliminate); - } - OPT(lower_simd_width); /* After SIMD lowering just in case we had to unroll the EOT send. */ @@ -5745,6 +5740,12 @@ fs_visitor::optimize() OPT(dead_code_eliminate); } + if (OPT(lower_d2x)) { + OPT(opt_copy_propagation); + OPT(dead_code_eliminate); + OPT(lower_simd_width); + } + lower_uniform_pull_constant_loads(); validate(); diff --git a/src/intel/compiler/brw_fs_lower_d2x.cpp b/src/intel/compiler/brw_fs_lower_d2x.cpp index a2db1154615..bc316360d05 100644 --- a/src/intel/compiler/brw_fs_lower_d2x.cpp +++ b/src/intel/compiler/brw_fs_lower_d2x.cpp @@ -27,48 +27,77 @@ using namespace brw; +static bool +supports_type_conversion(const fs_inst *inst) { + switch (inst->opcode) { + case BRW_OPCODE_MOV: + case SHADER_OPCODE_MOV_INDIRECT: + return true; + case BRW_OPCODE_SEL: + return inst->dst.type == get_exec_type(inst); + default: + /* FIXME: We assume the opcodes don't explicitly mentioned + * before just work fine with arbitrary conversions. + */ + return true; + } +} + bool fs_visitor::lower_d2x() { bool progress = false; - foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { - if (inst->opcode != BRW_OPCODE_MOV) - continue; - - if (inst->dst.type != BRW_REGISTER_TYPE_F && - inst->dst.type != BRW_REGISTER_TYPE_D && - inst->dst.type != BRW_REGISTER_TYPE_UD) - continue; + foreach_block_and_inst(block, fs_inst, inst, cfg) { + const fs_builder ibld(this, block, inst); + fs_reg dst = inst->dst; + bool saturate = inst->saturate; - if (inst->src[0].type != BRW_REGISTER_TYPE_DF && - inst->src[0].type != BRW_REGISTER_TYPE_UQ && - inst->src[0].type != BRW_REGISTER_TYPE_Q) - continue; + if (supports_type_conversion(inst)) { + if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) { + /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to + * Single Precision Float": + * + * The upper Dword of every Qword will be written with undefined + * value when converting DF to F. + * + * So we need to allocate a temporary that's two registers, and then do + * a strided MOV to get the lower DWord of every Qword that has the + * result. + */ + fs_reg temp = ibld.vgrf(get_exec_type(inst)); + fs_reg strided_temp = subscript(temp, dst.type, 0); - assert(inst->dst.file == VGRF); - assert(inst->saturate == false); - fs_reg dst = inst->dst; + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + inst->dst = strided_temp; + inst->saturate = false; + /* As it is an strided destination, we write n-times more being n the + * size ratio between source and destination types. Update + * size_written accordingly. + */ + inst->size_written = inst->dst.component_size(inst->exec_size); + ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = saturate; - const fs_builder ibld(this, block, inst); + progress = true; + } + } else { + fs_reg temp0 = ibld.vgrf(get_exec_type(inst)); - /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to - * Single Precision Float": - * - * The upper Dword of every Qword will be written with undefined - * value when converting DF to F. - * - * So we need to allocate a temporary that's two registers, and then do - * a strided MOV to get the lower DWord of every Qword that has the - * result. - */ - fs_reg temp = ibld.vgrf(inst->src[0].type, 1); - fs_reg strided_temp = subscript(temp, inst->dst.type, 0); - ibld.MOV(strided_temp, inst->src[0]); - ibld.MOV(dst, strided_temp); + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + inst->dst = temp0; + /* As it is an strided destination, we write n-times more being n the + * size ratio between source and destination types. Update + * size_written accordingly. + */ + inst->size_written = inst->dst.component_size(inst->exec_size); + inst->saturate = false; + /* Now, do the conversion to original destination's type. In next iteration, + * we will lower it if it is a d2f conversion. + */ + ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate; - inst->remove(block); - progress = true; + progress = true; + } } if (progress) -- 2.30.2