From 9e1b3ea199c3bd01fe89e6ab3eee4cae3da92264 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Sun, 1 Nov 2015 03:10:21 -0500 Subject: [PATCH] i965/fs: add a pass for legalizing d2f We need to do this late, in order to avoid partial writes during the optimization loop. v2: Use subscript() instead of stride(). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 5 ++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + .../drivers/dri/i965/brw_fs_lower_d2f.cpp | 74 +++++++++++++++++++ 4 files changed, 81 insertions(+) create mode 100644 src/mesa/drivers/dri/i965/brw_fs_lower_d2f.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 0bb94bd5bc1..cb244ef4357 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -26,6 +26,7 @@ i965_compiler_FILES = \ brw_fs.h \ brw_fs_live_variables.cpp \ brw_fs_live_variables.h \ + brw_fs_lower_d2f.cpp \ brw_fs_lower_pack.cpp \ brw_fs_nir.cpp \ brw_fs_reg_allocate.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 96610ba4328..7097742115b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5359,6 +5359,11 @@ fs_visitor::optimize() OPT(dead_code_eliminate); } + if (OPT(lower_d2f)) { + OPT(opt_copy_propagate); + OPT(dead_code_eliminate); + } + OPT(opt_combine_constants); OPT(lower_integer_multiplication); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 5ed123aa226..b3265fd2f03 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -174,6 +174,7 @@ public: void lower_uniform_pull_constant_loads(); bool lower_load_payload(); bool lower_pack(); + bool lower_d2f(); bool lower_logical_sends(); bool lower_integer_multiplication(); bool lower_minmax(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_lower_d2f.cpp b/src/mesa/drivers/dri/i965/brw_fs_lower_d2f.cpp new file mode 100644 index 00000000000..60bd943bfdb --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_lower_d2f.cpp @@ -0,0 +1,74 @@ +/* + * Copyright © 2015 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_fs.h" +#include "brw_cfg.h" +#include "brw_fs_builder.h" + +using namespace brw; + +bool +fs_visitor::lower_d2f() +{ + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + if (inst->opcode != BRW_OPCODE_MOV) + continue; + + if (inst->dst.type != BRW_REGISTER_TYPE_F) + continue; + + if (inst->src[0].type != BRW_REGISTER_TYPE_DF) + continue; + + assert(inst->dst.file == VGRF); + assert(inst->saturate == false); + fs_reg dst = inst->dst; + + const fs_builder ibld(this, block, inst); + + /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to + * Single Precision Float": + * + * The upper Dword of every Qword will be written with undefined + * value when converting DF to F. + * + * So we need to allocate a temporary that's two registers, and then do + * a strided MOV to get the lower DWord of every Qword that has the + * result. + */ + fs_reg temp = ibld.vgrf(inst->src[0].type, 1); + fs_reg strided_temp = subscript(temp, inst->dst.type, 0); + ibld.MOV(strided_temp, inst->src[0]); + ibld.MOV(dst, strided_temp); + + inst->remove(block); + progress = true; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} -- 2.30.2