From 2a76f03c90037a8966eeb7e47d86c11a6ada9312 Mon Sep 17 00:00:00 2001 From: Jose Maria Casanova Crespo Date: Thu, 3 May 2018 01:44:11 +0200 Subject: [PATCH] intel/compiler: fix 16-bit int brw_negate_immediate and brw_abs_immediate From Intel Skylake PRM, vol 07, "Immediate" section (page 768): "For a word, unsigned word, or half-float immediate data, software must replicate the same 16-bit immediate value to both the lower word and the high word of the 32-bit immediate field in a GEN instruction." This fixes the int16/uint16 negate and abs immediates that weren't taking into account the replication in lower and upper words. v2: Integer cases are different to Float cases. (Jason Ekstrand) Included reference to PRM (Jose Maria Casanova) v3: Make explicit uint32_t casting for left shift (Jason Ekstrand) Split half float implementation. (Jason Ekstrand) Fix brw_abs_immediate (Jose Maria Casanova) Cc: "18.0 18.1" Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_shader.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 9cdf9fcb23d..284c2e8233c 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -580,9 +580,11 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) reg->d = -reg->d; return true; case BRW_REGISTER_TYPE_W: - case BRW_REGISTER_TYPE_UW: - reg->d = -(int16_t)reg->ud; + case BRW_REGISTER_TYPE_UW: { + uint16_t value = -(int16_t)reg->ud; + reg->ud = value | (uint32_t)value << 16; return true; + } case BRW_REGISTER_TYPE_F: reg->f = -reg->f; return true; @@ -618,9 +620,11 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) case BRW_REGISTER_TYPE_D: reg->d = abs(reg->d); return true; - case BRW_REGISTER_TYPE_W: - reg->d = abs((int16_t)reg->ud); + case BRW_REGISTER_TYPE_W: { + uint16_t value = abs((int16_t)reg->ud); + reg->ud = value | (uint32_t)value << 16; return true; + } case BRW_REGISTER_TYPE_F: reg->f = fabsf(reg->f); return true; -- 2.30.2