From 497675c21ee34dfe1e8f9dfe62f6a3011f8062e5 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 10 Oct 2018 15:17:11 -0700 Subject: [PATCH] intel/fs: Fix nir_op_b2[fi] with 64-bit result on Gen8 LP and Gen9 LP Several of the Atom GPUs have additional restrictions on alignment when moving < 64-bit source to a 64-bit destination. All of the nir_op_*2*64 code generation paths respected this, but nir_op_b2[fi] did not. Previous to commit a68dd47b911 it was not possible to generate such an instruction from the GLSL path. It may have been possible from SPIR-V, but it's not clear. The aforementioned patch converts a 64-bit nir_op_fsign into a sequence of operations including a nir_op_b2f with a 64-bit result. This "just works" everywhere except these Atom parts. This problem was not detected during normal CI testing because the Atom parts are not included in developer builds. v2 (idr): Make the patch compile, and make some cosmetic changes. Add a commit message. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108319 Fixes: a68dd47b911 "nir/algebraic: Simplify fsat of fsign" Reviewed-by: Ian Romanick --- src/intel/compiler/brw_fs_nir.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 12b087a5ec0..7930205d659 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -793,6 +793,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_b2i: + case nir_op_b2f: + op[0].type = BRW_REGISTER_TYPE_D; + op[0].negate = !op[0].negate; + /* fallthrough */ case nir_op_f2f64: case nir_op_f2i64: case nir_op_f2u64: @@ -1213,11 +1218,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; - case nir_op_b2i: - case nir_op_b2f: - bld.MOV(result, negate(op[0])); - break; - case nir_op_i2b: case nir_op_f2b: { uint32_t bit_size = nir_src_bit_size(instr->src[0].src); -- 2.30.2