From 0177dbb6c2fe876a9761a4a97eec44accfa4c007 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 13 Jun 2016 03:13:23 -0400 Subject: [PATCH] i965/fs: Fix single-precision to double-precision conversions for CHV/BSW/BXT From the Cherryview PRM, Volume 7, 3D Media GPGPU Engine, Register Region Restrictions: "When source or destination is 64b (...), regioning in Align1 must follow these rules: 1. Source and destination horizontal stride must be aligned to the same qword. (...)" v2: - Fix it for Broxton too. v3: - Remove inst->regs_written change as it is not necessary (Ken) Cc: "12.0" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462 Tested-by: Mark Janes Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index b8119534b2b..d72b37b5854 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -715,10 +715,35 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_u2f: if (optimize_extract_to_float(instr, result)) return; + inst = bld.MOV(result, op[0]); + inst->saturate = instr->dest.saturate; + break; case nir_op_f2d: case nir_op_i2d: case nir_op_u2d: + /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions: + * + * "When source or destination is 64b (...), regioning in Align1 + * must follow these rules: + * + * 1. Source and destination horizontal stride must be aligned to + * the same qword. + * (...)" + * + * This means that 32-bit to 64-bit conversions need to have the 32-bit + * data elements aligned to 64-bit. This restriction does not apply to + * BDW and later. + */ + if (devinfo->is_cherryview || devinfo->is_broxton) { + fs_reg tmp = bld.vgrf(result.type, 1); + tmp = subscript(tmp, op[0].type, 0); + inst = bld.MOV(tmp, op[0]); + inst = bld.MOV(result, tmp); + inst->saturate = instr->dest.saturate; + break; + } + /* fallthrough */ case nir_op_d2f: case nir_op_d2i: case nir_op_d2u: -- 2.30.2