From 165b7f3a4487e7ab7738dd9b37f2b3375692f8a2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Sat, 26 Jan 2019 09:12:46 +0100 Subject: [PATCH] nir: define behavior of nir_op_bfm and nir_op_u/ibfe according to SM5 spec. That is: the five least significant bits provide the values of 'bits' and 'offset' which is the case for all hardware currently supported by NIR and using the bfm/bfe instructions. This patch also changes the lowering of bitfield_insert/extract using shifts to not use bfm and removes the flag 'lower_bfm'. Tested-by: Eric Anholt Reviewed-by: Connor Abbott --- src/broadcom/compiler/nir_to_vir.c | 1 - src/compiler/nir/nir.h | 6 ++-- src/compiler/nir/nir_opcodes.py | 28 ++++++++----------- src/compiler/nir/nir_opt_algebraic.py | 15 +++------- src/freedreno/ir3/ir3_nir.c | 2 -- .../drivers/nouveau/nvc0/nvc0_screen.c | 1 - 6 files changed, 18 insertions(+), 35 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 242414983e1..cb58b737a80 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2392,7 +2392,6 @@ const nir_shader_compiler_options v3d_nir_options = { .lower_all_io_to_temps = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_bfm = true, .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, .lower_bitfield_reverse = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0b3745be8b1..9feed169e47 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2282,18 +2282,16 @@ typedef struct nir_shader_compiler_options { bool lower_fmod; /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ bool lower_bitfield_extract; - /** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */ + /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */ bool lower_bitfield_extract_to_shifts; /** Lowers bitfield_insert to bfi/bfm */ bool lower_bitfield_insert; - /** Lowers bitfield_insert to bfm, compares, and shifts. */ + /** Lowers bitfield_insert to compares, and shifts. */ bool lower_bitfield_insert_to_shifts; /** Lowers bitfield_reverse to shifts. */ bool lower_bitfield_reverse; /** Lowers bit_count to shifts. */ bool lower_bit_count; - /** Lowers bfm to shifts and subtracts. */ - bool lower_bfm; /** Lowers ifind_msb to compare and ufind_msb */ bool lower_ifind_msb; /** Lowers find_lsb to ufind_msb and logic ops */ diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 1ab4a3e7a31..8771c74b033 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -787,14 +787,12 @@ binop_convert("pack_32_2x16_split", tuint32, tuint16, "", "src0 | ((uint32_t)src1 << 16)") # bfm implements the behavior of the first operation of the SM5 "bfi" assembly -# and that of the "bfi1" i965 instruction. That is, it has undefined behavior -# if either of its arguments are 32. +# and that of the "bfi1" i965 instruction. That is, the bits and offset values +# are from the low five bits of src0 and src1, respectively. binop_convert("bfm", tuint32, tint32, "", """ -int bits = src0, offset = src1; -if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32) - dst = 0; /* undefined */ -else - dst = ((1u << bits) - 1) << offset; +int bits = src0 & 0x1F; +int offset = src1 & 0x1F; +dst = ((1u << bits) - 1) << offset; """) opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """ @@ -873,15 +871,14 @@ if (mask == 0) { } """) -# SM5 ubfe/ibfe assembly +# SM5 ubfe/ibfe assembly: only the 5 least significant bits of offset and bits are used. opcode("ubfe", 0, tuint32, - [0, 0, 0], [tuint32, tint32, tint32], False, "", """ + [0, 0, 0], [tuint32, tuint32, tuint32], False, "", """ unsigned base = src0; -int offset = src1, bits = src2; +unsigned offset = src1 & 0x1F; +unsigned bits = src2 & 0x1F; if (bits == 0) { dst = 0; -} else if (bits < 0 || offset < 0) { - dst = 0; /* undefined */ } else if (offset + bits < 32) { dst = (base << (32 - bits - offset)) >> (32 - bits); } else { @@ -889,13 +886,12 @@ if (bits == 0) { } """) opcode("ibfe", 0, tint32, - [0, 0, 0], [tint32, tint32, tint32], False, "", """ + [0, 0, 0], [tint32, tuint32, tuint32], False, "", """ int base = src0; -int offset = src1, bits = src2; +unsigned offset = src1 & 0x1F; +unsigned bits = src2 & 0x1F; if (bits == 0) { dst = 0; -} else if (bits < 0 || offset < 0) { - dst = 0; /* undefined */ } else if (offset + bits < 32) { dst = (base << (32 - bits - offset)) >> (32 - bits); } else { diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 36e576b7eff..c7e69c0310c 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -794,18 +794,11 @@ optimizations.extend([ # Alternative lowering that doesn't rely on bfi. (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), - ('bcsel', ('ilt', 31, 'bits'), - 'insert', - ('ior', - ('iand', 'base', ('inot', ('bfm', 'bits', 'offset'))), - ('iand', ('ishl', 'insert', 'offset'), ('bfm', 'bits', 'offset')))), + ('ior', + ('iand', 'base', ('inot', ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))), + ('iand', ('ishl', 'insert', 'offset'), ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))), 'options->lower_bitfield_insert_to_shifts'), - # bfm lowering -- note that the NIR opcode is undefined if either arg is 32. - (('bfm', 'bits', 'offset'), - ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'), - 'options->lower_bfm'), - (('ibitfield_extract', 'value', 'offset', 'bits'), ('bcsel', ('ilt', 31, 'bits'), 'value', ('ibfe', 'value', 'offset', 'bits')), @@ -829,7 +822,7 @@ optimizations.extend([ ('ushr', 'value', 'offset'), ('bcsel', ('ieq', 'bits', 32), 0xffffffff, - ('bfm', 'bits', 0))), + ('isub', ('ishl', 1, 'bits'), 1))), 'options->lower_bitfield_extract_to_shifts'), (('ifind_msb', 'value'), diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 1ef47aa3e06..320e485b98d 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -54,7 +54,6 @@ static const nir_shader_compiler_options options = { .lower_helper_invocation = true, .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, - .lower_bfm = true, .use_interpolated_input_intrinsics = true, }; @@ -79,7 +78,6 @@ static const nir_shader_compiler_options options_a6xx = { .lower_helper_invocation = true, .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, - .lower_bfm = true, .use_interpolated_input_intrinsics = true, }; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 527d5a8108a..4d26568d391 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -912,7 +912,6 @@ static const nir_shader_compiler_options nir_options = { .lower_bitfield_insert_to_shifts = false, .lower_bitfield_reverse = false, .lower_bit_count = false, - .lower_bfm = false, .lower_ifind_msb = false, .lower_find_lsb = false, .lower_uadd_carry = true, // TODO -- 2.30.2