2 * Copyright © 2010 Intel Corporation
3 * Copyright © 2018 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
26 #include "nir_builder.h"
30 * NIR's home for miscellaneous ALU operation lowering implementations.
32 * Most NIR ALU lowering occurs in nir_opt_algebraic.py, since it's generally
33 * easy to write them there. However, if terms appear multiple times in the
34 * lowered code, it can get very verbose and cause a lot of work for CSE, so
35 * it may end up being easier to write out in C code.
37 * The shader must be in SSA for this pass.
40 #define LOWER_MUL_HIGH (1 << 0)
43 lower_alu_instr(nir_alu_instr
*instr
, nir_builder
*b
)
45 nir_ssa_def
*lowered
= NULL
;
47 assert(instr
->dest
.dest
.is_ssa
);
49 b
->cursor
= nir_before_instr(&instr
->instr
);
50 b
->exact
= instr
->exact
;
53 case nir_op_imul_high
:
54 case nir_op_umul_high
:
55 if (b
->shader
->options
->lower_mul_high
) {
56 nir_ssa_def
*c1
= nir_imm_int(b
, 1);
57 nir_ssa_def
*c16
= nir_imm_int(b
, 16);
59 nir_ssa_def
*src0
= nir_ssa_for_alu_src(b
, instr
, 0);
60 nir_ssa_def
*src1
= nir_ssa_for_alu_src(b
, instr
, 1);
61 nir_ssa_def
*different_signs
= NULL
;
62 if (instr
->op
== nir_op_imul_high
) {
63 nir_ssa_def
*c0
= nir_imm_int(b
, 0);
64 different_signs
= nir_ixor(b
,
66 nir_ilt(b
, src1
, c0
));
67 src0
= nir_iabs(b
, src0
);
68 src1
= nir_iabs(b
, src1
);
74 * (GH * CD) + (GH * AB) << 16 + (EF * CD) << 16 + (EF * AB) << 32
76 * Start by splitting into the 4 multiplies.
78 nir_ssa_def
*src0l
= nir_iand(b
, src0
, nir_imm_int(b
, 0xffff));
79 nir_ssa_def
*src1l
= nir_iand(b
, src1
, nir_imm_int(b
, 0xffff));
80 nir_ssa_def
*src0h
= nir_ushr(b
, src0
, c16
);
81 nir_ssa_def
*src1h
= nir_ushr(b
, src1
, c16
);
83 nir_ssa_def
*lo
= nir_imul(b
, src0l
, src1l
);
84 nir_ssa_def
*m1
= nir_imul(b
, src0l
, src1h
);
85 nir_ssa_def
*m2
= nir_imul(b
, src0h
, src1l
);
86 nir_ssa_def
*hi
= nir_imul(b
, src0h
, src1h
);
90 tmp
= nir_ishl(b
, m1
, c16
);
91 hi
= nir_iadd(b
, hi
, nir_iand(b
, nir_uadd_carry(b
, lo
, tmp
), c1
));
92 lo
= nir_iadd(b
, lo
, tmp
);
93 hi
= nir_iadd(b
, hi
, nir_ushr(b
, m1
, c16
));
95 tmp
= nir_ishl(b
, m2
, c16
);
96 hi
= nir_iadd(b
, hi
, nir_iand(b
, nir_uadd_carry(b
, lo
, tmp
), c1
));
97 lo
= nir_iadd(b
, lo
, tmp
);
98 hi
= nir_iadd(b
, hi
, nir_ushr(b
, m2
, c16
));
100 if (instr
->op
== nir_op_imul_high
) {
101 /* For channels where different_signs is set we have to perform a
102 * 64-bit negation. This is *not* the same as just negating the
103 * high 32-bits. Consider -3 * 2. The high 32-bits is 0, but the
104 * desired result is -1, not -0! Recall -x == ~x + 1.
106 hi
= nir_bcsel(b
, different_signs
,
126 nir_ssa_def_rewrite_uses(&instr
->dest
.dest
.ssa
, nir_src_for_ssa(lowered
));
127 nir_instr_remove(&instr
->instr
);
135 nir_lower_alu(nir_shader
*shader
)
137 bool progress
= false;
139 if (!shader
->options
->lower_mul_high
)
142 nir_foreach_function(function
, shader
) {
143 if (function
->impl
) {
145 nir_builder_init(&builder
, function
->impl
);
147 nir_foreach_block(block
, function
->impl
) {
148 nir_foreach_instr_safe(instr
, block
) {
149 if (instr
->type
== nir_instr_type_alu
) {
150 progress
= lower_alu_instr(nir_instr_as_alu(instr
),
151 &builder
) || progress
;
157 nir_metadata_preserve(function
->impl
,
158 nir_metadata_block_index
|
159 nir_metadata_dominance
);