2 * Copyright © 2010 Intel Corporation
3 * Copyright © 2018 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
26 #include "nir_builder.h"
30 * NIR's home for miscellaneous ALU operation lowering implementations.
32 * Most NIR ALU lowering occurs in nir_opt_algebraic.py, since it's generally
33 * easy to write them there. However, if terms appear multiple times in the
34 * lowered code, it can get very verbose and cause a lot of work for CSE, so
35 * it may end up being easier to write out in C code.
37 * The shader must be in SSA for this pass.
40 #define LOWER_MUL_HIGH (1 << 0)
43 lower_alu_instr(nir_alu_instr
*instr
, nir_builder
*b
)
45 nir_ssa_def
*lowered
= NULL
;
47 assert(instr
->dest
.dest
.is_ssa
);
49 b
->cursor
= nir_before_instr(&instr
->instr
);
50 b
->exact
= instr
->exact
;
53 case nir_op_bitfield_reverse
:
54 if (b
->shader
->options
->lower_bitfield_reverse
) {
55 /* For more details, see:
57 * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
59 nir_ssa_def
*c1
= nir_imm_int(b
, 1);
60 nir_ssa_def
*c2
= nir_imm_int(b
, 2);
61 nir_ssa_def
*c4
= nir_imm_int(b
, 4);
62 nir_ssa_def
*c8
= nir_imm_int(b
, 8);
63 nir_ssa_def
*c16
= nir_imm_int(b
, 16);
64 nir_ssa_def
*c33333333
= nir_imm_int(b
, 0x33333333);
65 nir_ssa_def
*c55555555
= nir_imm_int(b
, 0x55555555);
66 nir_ssa_def
*c0f0f0f0f
= nir_imm_int(b
, 0x0f0f0f0f);
67 nir_ssa_def
*c00ff00ff
= nir_imm_int(b
, 0x00ff00ff);
69 lowered
= nir_ssa_for_alu_src(b
, instr
, 0);
71 /* Swap odd and even bits. */
73 nir_iand(b
, nir_ushr(b
, lowered
, c1
), c55555555
),
74 nir_ishl(b
, nir_iand(b
, lowered
, c55555555
), c1
));
76 /* Swap consecutive pairs. */
78 nir_iand(b
, nir_ushr(b
, lowered
, c2
), c33333333
),
79 nir_ishl(b
, nir_iand(b
, lowered
, c33333333
), c2
));
83 nir_iand(b
, nir_ushr(b
, lowered
, c4
), c0f0f0f0f
),
84 nir_ishl(b
, nir_iand(b
, lowered
, c0f0f0f0f
), c4
));
88 nir_iand(b
, nir_ushr(b
, lowered
, c8
), c00ff00ff
),
89 nir_ishl(b
, nir_iand(b
, lowered
, c00ff00ff
), c8
));
92 nir_ushr(b
, lowered
, c16
),
93 nir_ishl(b
, lowered
, c16
));
97 case nir_op_bit_count
:
98 if (b
->shader
->options
->lower_bit_count
) {
99 /* For more details, see:
101 * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
103 nir_ssa_def
*c1
= nir_imm_int(b
, 1);
104 nir_ssa_def
*c2
= nir_imm_int(b
, 2);
105 nir_ssa_def
*c4
= nir_imm_int(b
, 4);
106 nir_ssa_def
*c24
= nir_imm_int(b
, 24);
107 nir_ssa_def
*c33333333
= nir_imm_int(b
, 0x33333333);
108 nir_ssa_def
*c55555555
= nir_imm_int(b
, 0x55555555);
109 nir_ssa_def
*c0f0f0f0f
= nir_imm_int(b
, 0x0f0f0f0f);
110 nir_ssa_def
*c01010101
= nir_imm_int(b
, 0x01010101);
112 lowered
= nir_ssa_for_alu_src(b
, instr
, 0);
114 lowered
= nir_isub(b
, lowered
,
115 nir_iand(b
, nir_ushr(b
, lowered
, c1
), c55555555
));
117 lowered
= nir_iadd(b
,
118 nir_iand(b
, lowered
, c33333333
),
119 nir_iand(b
, nir_ushr(b
, lowered
, c2
), c33333333
));
121 lowered
= nir_ushr(b
,
126 nir_ushr(b
, lowered
, c4
)),
133 case nir_op_imul_high
:
134 case nir_op_umul_high
:
135 if (b
->shader
->options
->lower_mul_high
) {
136 nir_ssa_def
*c1
= nir_imm_int(b
, 1);
137 nir_ssa_def
*c16
= nir_imm_int(b
, 16);
139 nir_ssa_def
*src0
= nir_ssa_for_alu_src(b
, instr
, 0);
140 nir_ssa_def
*src1
= nir_ssa_for_alu_src(b
, instr
, 1);
141 nir_ssa_def
*different_signs
= NULL
;
142 if (instr
->op
== nir_op_imul_high
) {
143 nir_ssa_def
*c0
= nir_imm_int(b
, 0);
144 different_signs
= nir_ixor(b
,
145 nir_ilt(b
, src0
, c0
),
146 nir_ilt(b
, src1
, c0
));
147 src0
= nir_iabs(b
, src0
);
148 src1
= nir_iabs(b
, src1
);
154 * (GH * CD) + (GH * AB) << 16 + (EF * CD) << 16 + (EF * AB) << 32
156 * Start by splitting into the 4 multiplies.
158 nir_ssa_def
*src0l
= nir_iand(b
, src0
, nir_imm_int(b
, 0xffff));
159 nir_ssa_def
*src1l
= nir_iand(b
, src1
, nir_imm_int(b
, 0xffff));
160 nir_ssa_def
*src0h
= nir_ushr(b
, src0
, c16
);
161 nir_ssa_def
*src1h
= nir_ushr(b
, src1
, c16
);
163 nir_ssa_def
*lo
= nir_imul(b
, src0l
, src1l
);
164 nir_ssa_def
*m1
= nir_imul(b
, src0l
, src1h
);
165 nir_ssa_def
*m2
= nir_imul(b
, src0h
, src1l
);
166 nir_ssa_def
*hi
= nir_imul(b
, src0h
, src1h
);
170 tmp
= nir_ishl(b
, m1
, c16
);
171 hi
= nir_iadd(b
, hi
, nir_iand(b
, nir_uadd_carry(b
, lo
, tmp
), c1
));
172 lo
= nir_iadd(b
, lo
, tmp
);
173 hi
= nir_iadd(b
, hi
, nir_ushr(b
, m1
, c16
));
175 tmp
= nir_ishl(b
, m2
, c16
);
176 hi
= nir_iadd(b
, hi
, nir_iand(b
, nir_uadd_carry(b
, lo
, tmp
), c1
));
177 lo
= nir_iadd(b
, lo
, tmp
);
178 hi
= nir_iadd(b
, hi
, nir_ushr(b
, m2
, c16
));
180 if (instr
->op
== nir_op_imul_high
) {
181 /* For channels where different_signs is set we have to perform a
182 * 64-bit negation. This is *not* the same as just negating the
183 * high 32-bits. Consider -3 * 2. The high 32-bits is 0, but the
184 * desired result is -1, not -0! Recall -x == ~x + 1.
186 hi
= nir_bcsel(b
, different_signs
,
206 nir_ssa_def_rewrite_uses(&instr
->dest
.dest
.ssa
, nir_src_for_ssa(lowered
));
207 nir_instr_remove(&instr
->instr
);
215 nir_lower_alu(nir_shader
*shader
)
217 bool progress
= false;
219 if (!shader
->options
->lower_bitfield_reverse
&&
220 !shader
->options
->lower_mul_high
)
223 nir_foreach_function(function
, shader
) {
224 if (function
->impl
) {
226 nir_builder_init(&builder
, function
->impl
);
228 nir_foreach_block(block
, function
->impl
) {
229 nir_foreach_instr_safe(instr
, block
) {
230 if (instr
->type
== nir_instr_type_alu
) {
231 progress
= lower_alu_instr(nir_instr_as_alu(instr
),
232 &builder
) || progress
;
238 nir_metadata_preserve(function
->impl
,
239 nir_metadata_block_index
|
240 nir_metadata_dominance
);