From b065d8fb8cf55373bfdd80994417f1ac60976158 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 27 Nov 2019 16:26:03 -0800 Subject: [PATCH] nir/algebraic: Optimize some 64-bit integer comparisons involving zero I noticed that we can do better for these kinds of comparisons while working on the lowering for iadd_sat@64 and isub_sat@64. This eliminated 11 instruction from the fs-addSaturate-int64.shader_test. My hope is that this will improve the run-time of int64 tests on Ice Lake. I have no data to support or refute this. Unsurprisingly, no changes on shader-db. v2: Condition the min and max patterns with nir_lower_minmax64. Suggested by Caio. Very long discussion in the MR. :) Reviewed-by: Caio Marcelo de Oliveira Filho Tested-by: Marge Bot Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index c52522379c6..7b9a6a8e45d 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1063,6 +1063,24 @@ optimizations.extend([ 0x7fffffffffffffff)), '(options->lower_int64_options & nir_lower_iadd64) != 0'), + # These are done here instead of in the backend because the int64 lowering + # pass will make a mess of the patterns. The first patterns are + # conditioned on nir_lower_minmax64 because it was not clear that it was + # always an improvement on platforms that have real int64 support. No + # shaders in shader-db hit this, so it was hard to say one way or the + # other. + (('ilt', ('imax(is_used_once)', 'a@64', 'b@64'), 0), ('ilt', ('imax', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'), + (('ilt', ('imin(is_used_once)', 'a@64', 'b@64'), 0), ('ilt', ('imin', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'), + (('ige', ('imax(is_used_once)', 'a@64', 'b@64'), 0), ('ige', ('imax', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'), + (('ige', ('imin(is_used_once)', 'a@64', 'b@64'), 0), ('ige', ('imin', ('unpack_64_2x32_split_y', a), ('unpack_64_2x32_split_y', b)), 0), '(options->lower_int64_options & nir_lower_minmax64) != 0'), + (('ilt', 'a@64', 0), ('ilt', ('unpack_64_2x32_split_y', a), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + (('ige', 'a@64', 0), ('ige', ('unpack_64_2x32_split_y', a), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + + (('ine', 'a@64', 0), ('ine', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + (('ieq', 'a@64', 0), ('ieq', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + # 0u < uint(a) <=> uint(a) != 0u + (('ult', 0, 'a@64'), ('ine', ('ior', ('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_y', a)), 0), '(options->lower_int64_options & nir_lower_icmp64) != 0'), + # Alternative lowering that doesn't rely on bfi. (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), ('bcsel', ('ult', 31, 'bits'), -- 2.30.2