From: Rhys Perry Date: Wed, 9 Oct 2019 14:27:07 +0000 (+0100) Subject: nir/algebraic: add some half packing optimizations X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1f72857739beed55276f263f49b3802c336b8c58;p=mesa.git nir/algebraic: add some half packing optimizations pipeline-db (ACO): Totals from affected shaders: SGPRS: 29200 -> 29200 (0.00 %) VGPRS: 17372 -> 17372 (0.00 %) Spilled SGPRs: 105 -> 105 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 1406576 -> 1389256 (-1.23 %) bytes LDS: 83 -> 83 (0.00 %) blocks Max Waves: 3976 -> 3976 (0.00 %) pipeline-db (LLVM): Totals from affected shaders: SGPRS: 21320 -> 21320 (0.00 %) VGPRS: 17056 -> 17036 (-0.12 %) Spilled SGPRs: 22 -> 22 (0.00 %) Spilled VGPRs: 503 -> 487 (-3.18 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 396 -> 396 (0.00 %) dwords per thread Code Size: 1441244 -> 1423292 (-1.25 %) bytes LDS: 463 -> 463 (0.00 %) blocks Max Waves: 3609 -> 3611 (0.06 %) v2: add pattern for ishr Signed-off-by: Rhys Perry Reviewed-by: Connor Abbott Tested-by: Marge Bot Part-of: --- diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index fd0007bb54d..f9fc119505f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -935,6 +935,15 @@ optimizations.extend([ (('unpack_half_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_half_2x16_split_y', a)), (('unpack_32_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_32_2x16_split_y', a)), (('unpack_64_2x32_split_y', ('iand', a, 0xffffffff00000000)), ('unpack_64_2x32_split_y', a)), + + # Optimize half packing + (('ishl', ('pack_half_2x16', ('vec2', a, 0)), 16), ('pack_half_2x16', ('vec2', 0, a))), + (('ishr', ('pack_half_2x16', ('vec2', 0, a)), 16), ('pack_half_2x16', ('vec2', a, 0))), + + (('iadd', ('pack_half_2x16', ('vec2', a, 0)), ('pack_half_2x16', ('vec2', 0, b))), + ('pack_half_2x16', ('vec2', a, b))), + (('ior', ('pack_half_2x16', ('vec2', a, 0)), ('pack_half_2x16', ('vec2', 0, b))), + ('pack_half_2x16', ('vec2', a, b))), ]) # After the ('extract_u8', a, 0) pattern, above, triggers, there will be