From c2019c6c261d5c46a4e5d3edc88836bcedf75f30 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 21 Jun 2016 17:18:04 -0700 Subject: [PATCH] i965: Use LZD to implement nir_op_ifind_msb on Gen < 7 v2: Retype LZD source as UD to avoid potential problems with 0x80000000. Suggested by Matt. Also update comment about problem values with LZD(abs(x)). Suggested by Curro. Signed-off-by: Ian Romanick Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 54 ++++++++++++++++---- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 57 +++++++++++++++++----- 2 files changed, 90 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 11215562738..5bf9aa4bbc7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -621,8 +621,36 @@ emit_find_msb_using_lzd(const fs_builder &bld, bool is_signed) { fs_inst *inst; + fs_reg temp = src; - bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src); + if (is_signed) { + /* LZD of an absolute value source almost always does the right + * thing. There are two problem values: + * + * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns + * 0. However, findMSB(int(0x80000000)) == 30. + * + * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns + * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: + * + * For a value of zero or negative one, -1 will be returned. + * + * * Negative powers of two. LZD(abs(-(1<dest.dest) < 64); - bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ - bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + if (devinfo->gen < 7) { + emit_find_msb_using_lzd(bld, result, op[0], true); + } else { + bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); - inst = bld.ADD(result, result, brw_imm_d(31)); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; + /* FBH counts from the MSB side, while GLSL's findMSB() wants the + * count from the LSB side. If FBH didn't return an error + * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB + * count into an LSB count. + */ + bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + + inst = bld.ADD(result, result, brw_imm_d(31)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 352d88af479..85fa775dc6b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1000,8 +1000,36 @@ emit_find_msb_using_lzd(const vec4_builder &bld, bool is_signed) { vec4_instruction *inst; + src_reg temp = src; - bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD), src); + if (is_signed) { + /* LZD of an absolute value source almost always does the right + * thing. There are two problem values: + * + * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns + * 0. However, findMSB(int(0x80000000)) == 30. + * + * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns + * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: + * + * For a value of zero or negative one, -1 will be returned. + * + * * Negative powers of two. LZD(abs(-(1<predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; + if (devinfo->gen < 7) { + emit_find_msb_using_lzd(bld, dst, op[0], true); + } else { + emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); + + /* FBH counts from the MSB side, while GLSL's findMSB() wants the + * count from the LSB side. If FBH didn't return an error + * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB + * count into an LSB count. + */ + bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + + inst = bld.ADD(dst, src, brw_imm_d(31)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; + } break; } -- 2.30.2