2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Rhys Perry (pendingchaos02@gmail.com)
31 #include "aco_builder.h"
38 bool checked_preds_for_uniform
;
39 bool all_preds_uniform
;
42 uint64_t cur_undef_operands
;
44 unsigned phi_block_idx
;
45 unsigned loop_nest_depth
;
46 std::map
<unsigned, unsigned> writes
;
47 std::vector
<Operand
> latest
;
48 std::vector
<bool> visited
;
51 Operand
get_ssa(Program
*program
, unsigned block_idx
, ssa_state
*state
, bool before_write
)
54 auto it
= state
->writes
.find(block_idx
);
55 if (it
!= state
->writes
.end())
56 return Operand(Temp(it
->second
, program
->lane_mask
));
57 if (state
->visited
[block_idx
])
58 return state
->latest
[block_idx
];
61 state
->visited
[block_idx
] = true;
63 Block
& block
= program
->blocks
[block_idx
];
64 size_t pred
= block
.linear_preds
.size();
65 if (pred
== 0 || block
.loop_nest_depth
< state
->loop_nest_depth
) {
66 return Operand(program
->lane_mask
);
67 } else if (block
.loop_nest_depth
> state
->loop_nest_depth
) {
68 Operand op
= get_ssa(program
, block_idx
- 1, state
, false);
69 state
->latest
[block_idx
] = op
;
71 } else if (pred
== 1 || block
.kind
& block_kind_loop_exit
) {
72 Operand op
= get_ssa(program
, block
.linear_preds
[0], state
, false);
73 state
->latest
[block_idx
] = op
;
75 } else if (block
.kind
& block_kind_loop_header
&&
76 !(program
->blocks
[state
->phi_block_idx
].kind
& block_kind_loop_exit
)) {
77 return Operand(program
->lane_mask
);
79 Temp res
= Temp(program
->allocateId(), program
->lane_mask
);
80 state
->latest
[block_idx
] = Operand(res
);
83 for (unsigned i
= 0; i
< pred
; i
++)
84 ops
[i
] = get_ssa(program
, block
.linear_preds
[i
], state
, false);
86 bool all_undef
= true;
87 for (unsigned i
= 0; i
< pred
; i
++)
88 all_undef
= all_undef
&& ops
[i
].isUndefined();
90 state
->latest
[block_idx
] = ops
[0];
94 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, pred
, 1)};
95 for (unsigned i
= 0; i
< pred
; i
++)
96 phi
->operands
[i
] = ops
[i
];
97 phi
->definitions
[0] = Definition(res
);
98 block
.instructions
.emplace(block
.instructions
.begin(), std::move(phi
));
104 void insert_before_logical_end(Block
*block
, aco_ptr
<Instruction
> instr
)
106 auto IsLogicalEnd
= [] (const aco_ptr
<Instruction
>& instr
) -> bool {
107 return instr
->opcode
== aco_opcode::p_logical_end
;
109 auto it
= std::find_if(block
->instructions
.crbegin(), block
->instructions
.crend(), IsLogicalEnd
);
111 if (it
== block
->instructions
.crend()) {
112 assert(block
->instructions
.back()->format
== Format::PSEUDO_BRANCH
);
113 block
->instructions
.insert(std::prev(block
->instructions
.end()), std::move(instr
));
115 block
->instructions
.insert(std::prev(it
.base()), std::move(instr
));
119 void build_merge_code(Program
*program
, Block
*block
, Definition dst
, Operand prev
, Operand cur
)
121 Builder
bld(program
);
123 auto IsLogicalEnd
= [] (const aco_ptr
<Instruction
>& instr
) -> bool {
124 return instr
->opcode
== aco_opcode::p_logical_end
;
126 auto it
= std::find_if(block
->instructions
.rbegin(), block
->instructions
.rend(), IsLogicalEnd
);
127 assert(it
!= block
->instructions
.rend());
128 bld
.reset(&block
->instructions
, std::prev(it
.base()));
130 if (prev
.isUndefined()) {
131 bld
.sop1(Builder::s_mov
, dst
, cur
);
135 bool prev_is_constant
= prev
.isConstant() && prev
.constantValue64(true) + 1u < 2u;
136 bool cur_is_constant
= cur
.isConstant() && cur
.constantValue64(true) + 1u < 2u;
138 if (!prev_is_constant
) {
139 if (!cur_is_constant
) {
140 Temp tmp1
= bld
.tmp(bld
.lm
), tmp2
= bld
.tmp(bld
.lm
);
141 bld
.sop2(Builder::s_andn2
, Definition(tmp1
), bld
.def(s1
, scc
), prev
, Operand(exec
, bld
.lm
));
142 bld
.sop2(Builder::s_and
, Definition(tmp2
), bld
.def(s1
, scc
), cur
, Operand(exec
, bld
.lm
));
143 bld
.sop2(Builder::s_or
, dst
, bld
.def(s1
, scc
), tmp1
, tmp2
);
144 } else if (cur
.constantValue64(true)) {
145 bld
.sop2(Builder::s_or
, dst
, bld
.def(s1
, scc
), prev
, Operand(exec
, bld
.lm
));
147 bld
.sop2(Builder::s_andn2
, dst
, bld
.def(s1
, scc
), prev
, Operand(exec
, bld
.lm
));
149 } else if (prev
.constantValue64(true)) {
150 if (!cur_is_constant
)
151 bld
.sop2(Builder::s_orn2
, dst
, bld
.def(s1
, scc
), cur
, Operand(exec
, bld
.lm
));
152 else if (cur
.constantValue64(true))
153 bld
.sop1(Builder::s_mov
, dst
, program
->wave_size
== 64 ? Operand(UINT64_MAX
) : Operand(UINT32_MAX
));
155 bld
.sop1(Builder::s_not
, dst
, bld
.def(s1
, scc
), Operand(exec
, bld
.lm
));
157 if (!cur_is_constant
)
158 bld
.sop2(Builder::s_and
, dst
, bld
.def(s1
, scc
), cur
, Operand(exec
, bld
.lm
));
159 else if (cur
.constantValue64(true))
160 bld
.sop1(Builder::s_mov
, dst
, Operand(exec
, bld
.lm
));
162 bld
.sop1(Builder::s_mov
, dst
, program
->wave_size
== 64 ? Operand((uint64_t)0u) : Operand(0u));
166 void lower_divergent_bool_phi(Program
*program
, ssa_state
*state
, Block
*block
, aco_ptr
<Instruction
>& phi
)
168 Builder
bld(program
);
170 if (!state
->checked_preds_for_uniform
) {
171 state
->all_preds_uniform
= !(block
->kind
& block_kind_merge
);
172 for (unsigned pred
: block
->logical_preds
)
173 state
->all_preds_uniform
= state
->all_preds_uniform
&& (program
->blocks
[pred
].kind
& block_kind_uniform
);
174 state
->checked_preds_for_uniform
= true;
177 if (state
->all_preds_uniform
) {
178 assert(block
->logical_preds
.size() == block
->linear_preds
.size());
179 phi
->opcode
= aco_opcode::p_linear_phi
;
183 state
->latest
.resize(program
->blocks
.size());
184 state
->visited
.resize(program
->blocks
.size());
186 uint64_t undef_operands
= 0;
187 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++)
188 undef_operands
|= phi
->operands
[i
].isUndefined() << i
;
190 if (state
->needs_init
|| undef_operands
!= state
->cur_undef_operands
||
191 block
->logical_preds
.size() > 64) {
192 /* this only has to be done once per block unless the set of predecessors
193 * which are undefined changes */
194 state
->cur_undef_operands
= undef_operands
;
195 state
->phi_block_idx
= block
->index
;
196 state
->loop_nest_depth
= block
->loop_nest_depth
;
197 if (block
->kind
& block_kind_loop_exit
) {
198 state
->loop_nest_depth
+= 1;
200 state
->writes
.clear();
201 state
->needs_init
= false;
203 std::fill(state
->latest
.begin(), state
->latest
.end(), Operand(program
->lane_mask
));
204 std::fill(state
->visited
.begin(), state
->visited
.end(), false);
206 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
207 if (phi
->operands
[i
].isUndefined())
210 state
->writes
[block
->logical_preds
[i
]] = program
->allocateId();
213 bool uniform_merge
= block
->kind
& block_kind_loop_header
;
215 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
216 Block
*pred
= &program
->blocks
[block
->logical_preds
[i
]];
218 bool need_get_ssa
= !uniform_merge
;
219 if (block
->kind
& block_kind_loop_header
&& !(pred
->kind
& block_kind_uniform
))
220 uniform_merge
= false;
222 if (phi
->operands
[i
].isUndefined())
227 cur
= get_ssa(program
, pred
->index
, state
, true);
228 assert(cur
.regClass() == bld
.lm
);
230 Temp new_cur
= {state
->writes
.at(pred
->index
), program
->lane_mask
};
231 assert(new_cur
.regClass() == bld
.lm
);
233 if (i
== 1 && (block
->kind
& block_kind_merge
) && phi
->operands
[0].isConstant())
234 cur
= phi
->operands
[0];
235 build_merge_code(program
, pred
, Definition(new_cur
), cur
, phi
->operands
[i
]);
238 unsigned num_preds
= block
->linear_preds
.size();
239 if (phi
->operands
.size() != num_preds
) {
240 Pseudo_instruction
* new_phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, num_preds
, 1)};
241 new_phi
->definitions
[0] = phi
->definitions
[0];
244 phi
->opcode
= aco_opcode::p_linear_phi
;
246 assert(phi
->operands
.size() == num_preds
);
248 for (unsigned i
= 0; i
< num_preds
; i
++)
249 phi
->operands
[i
] = get_ssa(program
, block
->linear_preds
[i
], state
, false);
254 void lower_subdword_phis(Program
*program
, Block
*block
, aco_ptr
<Instruction
>& phi
)
256 Builder
bld(program
);
257 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
258 if (phi
->operands
[i
].isUndefined())
260 if (phi
->operands
[i
].regClass() == phi
->definitions
[0].regClass())
263 assert(phi
->operands
[i
].isTemp());
264 Block
*pred
= &program
->blocks
[block
->logical_preds
[i
]];
265 Temp phi_src
= phi
->operands
[i
].getTemp();
267 assert(phi_src
.regClass().type() == RegType::sgpr
);
268 Temp tmp
= bld
.tmp(RegClass(RegType::vgpr
, phi_src
.size()));
269 insert_before_logical_end(pred
, bld
.pseudo(aco_opcode::p_create_vector
, Definition(tmp
), phi_src
).get_ptr());
270 Temp new_phi_src
= bld
.tmp(phi
->definitions
[0].regClass());
271 insert_before_logical_end(pred
, bld
.pseudo(aco_opcode::p_extract_vector
, Definition(new_phi_src
), tmp
, Operand(0u)).get_ptr());
273 phi
->operands
[i
].setTemp(new_phi_src
);
278 void lower_phis(Program
* program
)
282 for (Block
& block
: program
->blocks
) {
283 state
.checked_preds_for_uniform
= false;
284 state
.needs_init
= true;
285 for (aco_ptr
<Instruction
>& phi
: block
.instructions
) {
286 if (phi
->opcode
== aco_opcode::p_phi
) {
287 assert(program
->wave_size
== 64 ? phi
->definitions
[0].regClass() != s1
: phi
->definitions
[0].regClass() != s2
);
288 if (phi
->definitions
[0].regClass() == program
->lane_mask
)
289 lower_divergent_bool_phi(program
, &state
, &block
, phi
);
290 else if (phi
->definitions
[0].regClass().is_subdword())
291 lower_subdword_phis(program
, &block
, phi
);
292 } else if (!is_phi(phi
)) {