2 * Copyright © 2010 Intel Corporation
3 * Copyright © 2018 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
26 #include "nir_builder.h"
30 * NIR's home for miscellaneous ALU operation lowering implementations.
32 * Most NIR ALU lowering occurs in nir_opt_algebraic.py, since it's generally
33 * easy to write them there. However, if terms appear multiple times in the
34 * lowered code, it can get very verbose and cause a lot of work for CSE, so
35 * it may end up being easier to write out in C code.
37 * The shader must be in SSA for this pass.
40 #define LOWER_MUL_HIGH (1 << 0)
43 lower_alu_instr(nir_alu_instr
*instr
, nir_builder
*b
)
45 nir_ssa_def
*lowered
= NULL
;
47 assert(instr
->dest
.dest
.is_ssa
);
49 b
->cursor
= nir_before_instr(&instr
->instr
);
50 b
->exact
= instr
->exact
;
53 case nir_op_bitfield_reverse
:
54 if (b
->shader
->options
->lower_bitfield_reverse
) {
55 /* For more details, see:
57 * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
59 nir_ssa_def
*c1
= nir_imm_int(b
, 1);
60 nir_ssa_def
*c2
= nir_imm_int(b
, 2);
61 nir_ssa_def
*c4
= nir_imm_int(b
, 4);
62 nir_ssa_def
*c8
= nir_imm_int(b
, 8);
63 nir_ssa_def
*c16
= nir_imm_int(b
, 16);
64 nir_ssa_def
*c33333333
= nir_imm_int(b
, 0x33333333);
65 nir_ssa_def
*c55555555
= nir_imm_int(b
, 0x55555555);
66 nir_ssa_def
*c0f0f0f0f
= nir_imm_int(b
, 0x0f0f0f0f);
67 nir_ssa_def
*c00ff00ff
= nir_imm_int(b
, 0x00ff00ff);
69 lowered
= nir_ssa_for_alu_src(b
, instr
, 0);
71 /* Swap odd and even bits. */
73 nir_iand(b
, nir_ushr(b
, lowered
, c1
), c55555555
),
74 nir_ishl(b
, nir_iand(b
, lowered
, c55555555
), c1
));
76 /* Swap consecutive pairs. */
78 nir_iand(b
, nir_ushr(b
, lowered
, c2
), c33333333
),
79 nir_ishl(b
, nir_iand(b
, lowered
, c33333333
), c2
));
83 nir_iand(b
, nir_ushr(b
, lowered
, c4
), c0f0f0f0f
),
84 nir_ishl(b
, nir_iand(b
, lowered
, c0f0f0f0f
), c4
));
88 nir_iand(b
, nir_ushr(b
, lowered
, c8
), c00ff00ff
),
89 nir_ishl(b
, nir_iand(b
, lowered
, c00ff00ff
), c8
));
92 nir_ushr(b
, lowered
, c16
),
93 nir_ishl(b
, lowered
, c16
));
97 case nir_op_bit_count
:
98 if (b
->shader
->options
->lower_bit_count
) {
99 /* For more details, see:
101 * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
103 nir_ssa_def
*c1
= nir_imm_int(b
, 1);
104 nir_ssa_def
*c2
= nir_imm_int(b
, 2);
105 nir_ssa_def
*c4
= nir_imm_int(b
, 4);
106 nir_ssa_def
*c24
= nir_imm_int(b
, 24);
107 nir_ssa_def
*c33333333
= nir_imm_int(b
, 0x33333333);
108 nir_ssa_def
*c55555555
= nir_imm_int(b
, 0x55555555);
109 nir_ssa_def
*c0f0f0f0f
= nir_imm_int(b
, 0x0f0f0f0f);
110 nir_ssa_def
*c01010101
= nir_imm_int(b
, 0x01010101);
112 lowered
= nir_ssa_for_alu_src(b
, instr
, 0);
114 lowered
= nir_isub(b
, lowered
,
115 nir_iand(b
, nir_ushr(b
, lowered
, c1
), c55555555
));
117 lowered
= nir_iadd(b
,
118 nir_iand(b
, lowered
, c33333333
),
119 nir_iand(b
, nir_ushr(b
, lowered
, c2
), c33333333
));
121 lowered
= nir_ushr(b
,
126 nir_ushr(b
, lowered
, c4
)),
133 case nir_op_imul_high
:
134 case nir_op_umul_high
:
135 if (b
->shader
->options
->lower_mul_high
) {
136 nir_ssa_def
*src0
= nir_ssa_for_alu_src(b
, instr
, 0);
137 nir_ssa_def
*src1
= nir_ssa_for_alu_src(b
, instr
, 1);
138 if (src0
->bit_size
< 32) {
139 /* Just do the math in 32-bit space and shift the result */
140 nir_alu_type base_type
= nir_op_infos
[instr
->op
].output_type
;
141 nir_op upcast_op
= nir_type_conversion_op(base_type
| src0
->bit_size
, base_type
| 32, nir_rounding_mode_undef
);
142 nir_op downscast_op
= nir_type_conversion_op(base_type
| 32, base_type
| src0
->bit_size
, nir_rounding_mode_undef
);
144 nir_ssa_def
*src0_32
= nir_build_alu(b
, upcast_op
, src0
, NULL
, NULL
, NULL
);
145 nir_ssa_def
*src1_32
= nir_build_alu(b
, upcast_op
, src1
, NULL
, NULL
, NULL
);
146 nir_ssa_def
*dest_32
= nir_imul(b
, src0_32
, src1_32
);
147 nir_ssa_def
*dest_shifted
= nir_ishr(b
, dest_32
, nir_imm_int(b
, src0
->bit_size
));
148 lowered
= nir_build_alu(b
, downscast_op
, dest_shifted
, NULL
, NULL
, NULL
);
150 nir_ssa_def
*c1
= nir_imm_intN_t(b
, 1, src0
->bit_size
);
151 nir_ssa_def
*cshift
= nir_imm_int(b
, src0
->bit_size
/ 2);
152 nir_ssa_def
*cmask
= nir_imm_intN_t(b
, (1ull << (src0
->bit_size
/ 2)) - 1, src0
->bit_size
);
153 nir_ssa_def
*different_signs
= NULL
;
154 if (instr
->op
== nir_op_imul_high
) {
155 nir_ssa_def
*c0
= nir_imm_intN_t(b
, 0, src0
->bit_size
);
156 different_signs
= nir_ixor(b
,
157 nir_ilt(b
, src0
, c0
),
158 nir_ilt(b
, src1
, c0
));
159 src0
= nir_iabs(b
, src0
);
160 src1
= nir_iabs(b
, src1
);
166 * (GH * CD) + (GH * AB) << 16 + (EF * CD) << 16 + (EF * AB) << 32
168 * Start by splitting into the 4 multiplies.
170 nir_ssa_def
*src0l
= nir_iand(b
, src0
, cmask
);
171 nir_ssa_def
*src1l
= nir_iand(b
, src1
, cmask
);
172 nir_ssa_def
*src0h
= nir_ushr(b
, src0
, cshift
);
173 nir_ssa_def
*src1h
= nir_ushr(b
, src1
, cshift
);
175 nir_ssa_def
*lo
= nir_imul(b
, src0l
, src1l
);
176 nir_ssa_def
*m1
= nir_imul(b
, src0l
, src1h
);
177 nir_ssa_def
*m2
= nir_imul(b
, src0h
, src1l
);
178 nir_ssa_def
*hi
= nir_imul(b
, src0h
, src1h
);
182 tmp
= nir_ishl(b
, m1
, cshift
);
183 hi
= nir_iadd(b
, hi
, nir_iand(b
, nir_uadd_carry(b
, lo
, tmp
), c1
));
184 lo
= nir_iadd(b
, lo
, tmp
);
185 hi
= nir_iadd(b
, hi
, nir_ushr(b
, m1
, cshift
));
187 tmp
= nir_ishl(b
, m2
, cshift
);
188 hi
= nir_iadd(b
, hi
, nir_iand(b
, nir_uadd_carry(b
, lo
, tmp
), c1
));
189 lo
= nir_iadd(b
, lo
, tmp
);
190 hi
= nir_iadd(b
, hi
, nir_ushr(b
, m2
, cshift
));
192 if (instr
->op
== nir_op_imul_high
) {
193 /* For channels where different_signs is set we have to perform a
194 * 64-bit negation. This is *not* the same as just negating the
195 * high 32-bits. Consider -3 * 2. The high 32-bits is 0, but the
196 * desired result is -1, not -0! Recall -x == ~x + 1.
198 hi
= nir_bcsel(b
, different_signs
,
205 nir_imm_intN_t(b
, 1, src0
->bit_size
))),
219 nir_ssa_def_rewrite_uses(&instr
->dest
.dest
.ssa
, nir_src_for_ssa(lowered
));
220 nir_instr_remove(&instr
->instr
);
228 nir_lower_alu(nir_shader
*shader
)
230 bool progress
= false;
232 if (!shader
->options
->lower_bitfield_reverse
&&
233 !shader
->options
->lower_mul_high
)
236 nir_foreach_function(function
, shader
) {
237 if (function
->impl
) {
239 nir_builder_init(&builder
, function
->impl
);
241 nir_foreach_block(block
, function
->impl
) {
242 nir_foreach_instr_safe(instr
, block
) {
243 if (instr
->type
== nir_instr_type_alu
) {
244 progress
= lower_alu_instr(nir_instr_as_alu(instr
),
245 &builder
) || progress
;
251 nir_metadata_preserve(function
->impl
,
252 nir_metadata_block_index
|
253 nir_metadata_dominance
);