2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "aco_builder.h"
32 enum WQMState
: uint8_t {
35 WQM
= 1 << 1, /* with control flow applied */
36 Preserve_WQM
= 1 << 2,
37 Exact_Branch
= 1 << 3,
40 enum mask_type
: uint8_t {
41 mask_type_global
= 1 << 0,
42 mask_type_exact
= 1 << 1,
43 mask_type_wqm
= 1 << 2,
44 mask_type_loop
= 1 << 3, /* active lanes of a loop */
45 mask_type_initial
= 1 << 4, /* initially active lanes */
50 /* state for WQM propagation */
51 std::set
<unsigned> worklist
;
52 std::vector
<uint16_t> defined_in
;
53 std::vector
<bool> needs_wqm
;
54 std::vector
<bool> branch_wqm
; /* true if the branch condition in this block should be in wqm */
57 wqm_ctx(Program
* program
) : program(program
),
58 defined_in(program
->peekAllocationId(), 0xFFFF),
59 needs_wqm(program
->peekAllocationId()),
60 branch_wqm(program
->blocks
.size()),
64 for (unsigned i
= 0; i
< program
->blocks
.size(); i
++)
71 uint16_t num_exec_masks
;
73 bool has_divergent_break
;
74 bool has_divergent_continue
;
75 bool has_discard
; /* has a discard or demote */
76 loop_info(Block
* b
, uint16_t num
, uint8_t needs
, bool breaks
, bool cont
, bool discard
) :
77 loop_header(b
), num_exec_masks(num
), needs(needs
), has_divergent_break(breaks
),
78 has_divergent_continue(cont
), has_discard(discard
) {}
82 std::vector
<std::pair
<Temp
, uint8_t>> exec
;
83 std::vector
<WQMState
> instr_needs
;
85 uint8_t ever_again_needs
;
91 std::vector
<block_info
> info
;
92 std::vector
<loop_info
> loop
;
93 bool handle_wqm
= false;
94 exec_ctx(Program
*program
) : program(program
), info(program
->blocks
.size()) {}
97 bool pred_by_exec_mask(aco_ptr
<Instruction
>& instr
) {
98 if (instr
->format
== Format::SMEM
|| instr
->isSALU())
100 if (instr
->format
== Format::PSEUDO_BARRIER
)
103 if (instr
->format
== Format::PSEUDO
) {
104 switch (instr
->opcode
) {
105 case aco_opcode::p_create_vector
:
106 return instr
->definitions
[0].getTemp().type() == RegType::vgpr
;
107 case aco_opcode::p_extract_vector
:
108 case aco_opcode::p_split_vector
:
109 return instr
->operands
[0].getTemp().type() == RegType::vgpr
;
110 case aco_opcode::p_spill
:
111 case aco_opcode::p_reload
:
118 if (instr
->opcode
== aco_opcode::v_readlane_b32
||
119 instr
->opcode
== aco_opcode::v_writelane_b32
)
125 bool needs_exact(aco_ptr
<Instruction
>& instr
) {
126 if (instr
->format
== Format::MUBUF
) {
127 MUBUF_instruction
*mubuf
= static_cast<MUBUF_instruction
*>(instr
.get());
128 return mubuf
->disable_wqm
;
129 } else if (instr
->format
== Format::MTBUF
) {
130 MTBUF_instruction
*mtbuf
= static_cast<MTBUF_instruction
*>(instr
.get());
131 return mtbuf
->disable_wqm
;
132 } else if (instr
->format
== Format::MIMG
) {
133 MIMG_instruction
*mimg
= static_cast<MIMG_instruction
*>(instr
.get());
134 return mimg
->disable_wqm
;
136 return instr
->format
== Format::EXP
|| instr
->opcode
== aco_opcode::p_fs_buffer_store_smem
;
140 void set_needs_wqm(wqm_ctx
&ctx
, Temp tmp
)
142 if (!ctx
.needs_wqm
[tmp
.id()]) {
143 ctx
.needs_wqm
[tmp
.id()] = true;
144 if (ctx
.defined_in
[tmp
.id()] != 0xFFFF)
145 ctx
.worklist
.insert(ctx
.defined_in
[tmp
.id()]);
149 void mark_block_wqm(wqm_ctx
&ctx
, unsigned block_idx
)
151 if (ctx
.branch_wqm
[block_idx
])
154 ctx
.branch_wqm
[block_idx
] = true;
155 Block
& block
= ctx
.program
->blocks
[block_idx
];
156 aco_ptr
<Instruction
>& branch
= block
.instructions
.back();
158 if (branch
->opcode
!= aco_opcode::p_branch
) {
159 assert(!branch
->operands
.empty() && branch
->operands
[0].isTemp());
160 set_needs_wqm(ctx
, branch
->operands
[0].getTemp());
163 /* TODO: this sets more branch conditions to WQM than it needs to
164 * it should be enough to stop at the "exec mask top level" */
165 if (block
.kind
& block_kind_top_level
)
168 for (unsigned pred_idx
: block
.logical_preds
)
169 mark_block_wqm(ctx
, pred_idx
);
172 void get_block_needs(wqm_ctx
&ctx
, exec_ctx
&exec_ctx
, Block
* block
)
174 block_info
& info
= exec_ctx
.info
[block
->index
];
176 std::vector
<WQMState
> instr_needs(block
->instructions
.size());
178 if (block
->kind
& block_kind_top_level
) {
179 if (ctx
.loop
&& ctx
.wqm
) {
180 /* mark all break conditions as WQM */
181 unsigned block_idx
= block
->index
+ 1;
182 while (!(ctx
.program
->blocks
[block_idx
].kind
& block_kind_top_level
)) {
183 if (ctx
.program
->blocks
[block_idx
].kind
& block_kind_break
)
184 mark_block_wqm(ctx
, block_idx
);
187 } else if (ctx
.loop
&& !ctx
.wqm
) {
188 /* Ensure a branch never results in an exec mask with only helper
189 * invocations (which can cause a loop to repeat infinitively if it's
190 * break branches are done in exact). */
191 unsigned block_idx
= block
->index
;
193 if ((ctx
.program
->blocks
[block_idx
].kind
& block_kind_branch
))
194 exec_ctx
.info
[block_idx
].block_needs
|= Exact_Branch
;
196 } while (!(ctx
.program
->blocks
[block_idx
].kind
& block_kind_top_level
));
203 for (int i
= block
->instructions
.size() - 1; i
>= 0; --i
)
205 aco_ptr
<Instruction
>& instr
= block
->instructions
[i
];
207 WQMState needs
= needs_exact(instr
) ? Exact
: Unspecified
;
208 bool propagate_wqm
= instr
->opcode
== aco_opcode::p_wqm
;
209 bool preserve_wqm
= instr
->opcode
== aco_opcode::p_discard_if
;
210 bool pred_by_exec
= pred_by_exec_mask(instr
);
211 for (const Definition
& definition
: instr
->definitions
) {
212 if (!definition
.isTemp())
214 const unsigned def
= definition
.tempId();
215 ctx
.defined_in
[def
] = block
->index
;
216 if (needs
== Unspecified
&& ctx
.needs_wqm
[def
]) {
217 needs
= pred_by_exec
? WQM
: Unspecified
;
218 propagate_wqm
= true;
223 for (const Operand
& op
: instr
->operands
) {
225 set_needs_wqm(ctx
, op
.getTemp());
228 } else if (preserve_wqm
&& info
.block_needs
& WQM
) {
229 needs
= Preserve_WQM
;
232 /* ensure the condition controlling the control flow for this phi is in WQM */
233 if (needs
== WQM
&& instr
->opcode
== aco_opcode::p_phi
) {
234 for (unsigned pred_idx
: block
->logical_preds
)
235 mark_block_wqm(ctx
, pred_idx
);
238 instr_needs
[i
] = needs
;
239 info
.block_needs
|= needs
;
242 info
.instr_needs
= instr_needs
;
244 /* for "if (<cond>) <wqm code>" or "while (<cond>) <wqm code>",
245 * <cond> should be computed in WQM */
246 if (info
.block_needs
& WQM
&& !(block
->kind
& block_kind_top_level
)) {
247 for (unsigned pred_idx
: block
->logical_preds
)
248 mark_block_wqm(ctx
, pred_idx
);
251 if (block
->kind
& block_kind_loop_header
)
255 void calculate_wqm_needs(exec_ctx
& exec_ctx
)
257 wqm_ctx
ctx(exec_ctx
.program
);
259 while (!ctx
.worklist
.empty()) {
260 unsigned block_index
= *std::prev(ctx
.worklist
.end());
261 ctx
.worklist
.erase(std::prev(ctx
.worklist
.end()));
263 get_block_needs(ctx
, exec_ctx
, &exec_ctx
.program
->blocks
[block_index
]);
266 uint8_t ever_again_needs
= 0;
267 for (int i
= exec_ctx
.program
->blocks
.size() - 1; i
>= 0; i
--) {
268 exec_ctx
.info
[i
].ever_again_needs
= ever_again_needs
;
269 Block
& block
= exec_ctx
.program
->blocks
[i
];
271 if (block
.kind
& block_kind_needs_lowering
)
272 exec_ctx
.info
[i
].block_needs
|= Exact
;
274 /* if discard is used somewhere in nested CF, we need to preserve the WQM mask */
275 if ((block
.kind
& block_kind_discard
||
276 block
.kind
& block_kind_uses_discard_if
) &&
277 ever_again_needs
& WQM
)
278 exec_ctx
.info
[i
].block_needs
|= Preserve_WQM
;
280 ever_again_needs
|= exec_ctx
.info
[i
].block_needs
& ~Exact_Branch
;
281 if (block
.kind
& block_kind_discard
||
282 block
.kind
& block_kind_uses_discard_if
||
283 block
.kind
& block_kind_uses_demote
)
284 ever_again_needs
|= Exact
;
286 /* don't propagate WQM preservation further than the next top_level block */
287 if (block
.kind
& block_kind_top_level
)
288 ever_again_needs
&= ~Preserve_WQM
;
290 exec_ctx
.info
[i
].block_needs
&= ~Preserve_WQM
;
292 exec_ctx
.handle_wqm
= true;
295 void transition_to_WQM(exec_ctx
& ctx
, Builder bld
, unsigned idx
)
297 if (ctx
.info
[idx
].exec
.back().second
& mask_type_wqm
)
299 if (ctx
.info
[idx
].exec
.back().second
& mask_type_global
) {
300 Temp exec_mask
= ctx
.info
[idx
].exec
.back().first
;
301 exec_mask
= bld
.sop1(aco_opcode::s_wqm_b64
, bld
.def(s2
, exec
), bld
.def(s1
, scc
), exec_mask
);
302 ctx
.info
[idx
].exec
.emplace_back(exec_mask
, mask_type_global
| mask_type_wqm
);
305 /* otherwise, the WQM mask should be one below the current mask */
306 ctx
.info
[idx
].exec
.pop_back();
307 assert(ctx
.info
[idx
].exec
.back().second
& mask_type_wqm
);
308 ctx
.info
[idx
].exec
.back().first
= bld
.pseudo(aco_opcode::p_parallelcopy
, bld
.def(s2
, exec
),
309 ctx
.info
[idx
].exec
.back().first
);
312 void transition_to_Exact(exec_ctx
& ctx
, Builder bld
, unsigned idx
)
314 if (ctx
.info
[idx
].exec
.back().second
& mask_type_exact
)
316 /* We can't remove the loop exec mask, because that can cause exec.size() to
317 * be less than num_exec_masks. The loop exec mask also needs to be kept
318 * around for various uses. */
319 if ((ctx
.info
[idx
].exec
.back().second
& mask_type_global
) &&
320 !(ctx
.info
[idx
].exec
.back().second
& mask_type_loop
)) {
321 ctx
.info
[idx
].exec
.pop_back();
322 assert(ctx
.info
[idx
].exec
.back().second
& mask_type_exact
);
323 ctx
.info
[idx
].exec
.back().first
= bld
.pseudo(aco_opcode::p_parallelcopy
, bld
.def(s2
, exec
),
324 ctx
.info
[idx
].exec
.back().first
);
327 /* otherwise, we create an exact mask and push to the stack */
328 Temp wqm
= ctx
.info
[idx
].exec
.back().first
;
329 Temp exact
= bld
.tmp(s2
);
330 wqm
= bld
.sop1(aco_opcode::s_and_saveexec_b64
, bld
.def(s2
), bld
.def(s1
, scc
),
331 bld
.exec(Definition(exact
)), ctx
.info
[idx
].exec
[0].first
, bld
.exec(wqm
));
332 ctx
.info
[idx
].exec
.back().first
= wqm
;
333 ctx
.info
[idx
].exec
.emplace_back(exact
, mask_type_exact
);
336 unsigned add_coupling_code(exec_ctx
& ctx
, Block
* block
,
337 std::vector
<aco_ptr
<Instruction
>>& instructions
)
339 unsigned idx
= block
->index
;
340 Builder
bld(ctx
.program
, &instructions
);
341 std::vector
<unsigned>& preds
= block
->linear_preds
;
345 aco_ptr
<Instruction
>& startpgm
= block
->instructions
[0];
346 assert(startpgm
->opcode
== aco_opcode::p_startpgm
);
347 Temp exec_mask
= startpgm
->definitions
.back().getTemp();
348 bld
.insert(std::move(startpgm
));
350 if (ctx
.handle_wqm
) {
351 ctx
.info
[0].exec
.emplace_back(exec_mask
, mask_type_global
| mask_type_exact
| mask_type_initial
);
352 /* if this block only needs WQM, initialize already */
353 if (ctx
.info
[0].block_needs
== WQM
)
354 transition_to_WQM(ctx
, bld
, 0);
356 uint8_t mask
= mask_type_global
;
357 if (ctx
.program
->needs_wqm
) {
358 exec_mask
= bld
.sop1(aco_opcode::s_wqm_b64
, bld
.def(s2
, exec
), bld
.def(s1
, scc
), bld
.exec(exec_mask
));
359 mask
|= mask_type_wqm
;
361 mask
|= mask_type_exact
;
363 ctx
.info
[0].exec
.emplace_back(exec_mask
, mask
);
369 /* loop entry block */
370 if (block
->kind
& block_kind_loop_header
) {
371 assert(preds
[0] == idx
- 1);
372 ctx
.info
[idx
].exec
= ctx
.info
[idx
- 1].exec
;
373 loop_info
& info
= ctx
.loop
.back();
374 while (ctx
.info
[idx
].exec
.size() > info
.num_exec_masks
)
375 ctx
.info
[idx
].exec
.pop_back();
377 /* create ssa names for outer exec masks */
378 if (info
.has_discard
) {
379 aco_ptr
<Pseudo_instruction
> phi
;
380 for (int i
= 0; i
< info
.num_exec_masks
- 1; i
++) {
381 phi
.reset(create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, preds
.size(), 1));
382 phi
->definitions
[0] = bld
.def(s2
);
383 phi
->operands
[0] = Operand(ctx
.info
[preds
[0]].exec
[i
].first
);
384 ctx
.info
[idx
].exec
[i
].first
= bld
.insert(std::move(phi
));
388 /* create ssa name for restore mask */
389 if (info
.has_divergent_break
) {
390 /* this phi might be trivial but ensures a parallelcopy on the loop header */
391 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, preds
.size(), 1)};
392 phi
->definitions
[0] = bld
.def(s2
);
393 phi
->operands
[0] = Operand(ctx
.info
[preds
[0]].exec
[info
.num_exec_masks
- 1].first
);
394 ctx
.info
[idx
].exec
.back().first
= bld
.insert(std::move(phi
));
397 /* create ssa name for loop active mask */
398 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, preds
.size(), 1)};
399 if (info
.has_divergent_continue
)
400 phi
->definitions
[0] = bld
.def(s2
);
402 phi
->definitions
[0] = bld
.def(s2
, exec
);
403 phi
->operands
[0] = Operand(ctx
.info
[preds
[0]].exec
.back().first
);
404 Temp loop_active
= bld
.insert(std::move(phi
));
406 if (info
.has_divergent_break
) {
407 uint8_t mask_type
= (ctx
.info
[idx
].exec
.back().second
& (mask_type_wqm
| mask_type_exact
)) | mask_type_loop
;
408 ctx
.info
[idx
].exec
.emplace_back(loop_active
, mask_type
);
410 ctx
.info
[idx
].exec
.back().first
= loop_active
;
411 ctx
.info
[idx
].exec
.back().second
|= mask_type_loop
;
414 /* create a parallelcopy to move the active mask to exec */
416 if (info
.has_divergent_continue
) {
417 while (block
->instructions
[i
]->opcode
!= aco_opcode::p_logical_start
) {
418 bld
.insert(std::move(block
->instructions
[i
]));
421 uint8_t mask_type
= ctx
.info
[idx
].exec
.back().second
& (mask_type_wqm
| mask_type_exact
);
422 ctx
.info
[idx
].exec
.emplace_back(bld
.pseudo(aco_opcode::p_parallelcopy
, bld
.def(s2
, exec
),
423 ctx
.info
[idx
].exec
.back().first
), mask_type
);
429 /* loop exit block */
430 if (block
->kind
& block_kind_loop_exit
) {
431 Block
* header
= ctx
.loop
.back().loop_header
;
432 loop_info
& info
= ctx
.loop
.back();
434 for (ASSERTED
unsigned pred
: preds
)
435 assert(ctx
.info
[pred
].exec
.size() >= info
.num_exec_masks
);
437 /* fill the loop header phis */
438 std::vector
<unsigned>& header_preds
= header
->linear_preds
;
440 if (info
.has_discard
) {
441 while (k
< info
.num_exec_masks
- 1) {
442 aco_ptr
<Instruction
>& phi
= header
->instructions
[k
];
443 assert(phi
->opcode
== aco_opcode::p_linear_phi
);
444 for (unsigned i
= 1; i
< phi
->operands
.size(); i
++)
445 phi
->operands
[i
] = Operand(ctx
.info
[header_preds
[i
]].exec
[k
].first
);
449 aco_ptr
<Instruction
>& phi
= header
->instructions
[k
++];
450 assert(phi
->opcode
== aco_opcode::p_linear_phi
);
451 for (unsigned i
= 1; i
< phi
->operands
.size(); i
++)
452 phi
->operands
[i
] = Operand(ctx
.info
[header_preds
[i
]].exec
[info
.num_exec_masks
- 1].first
);
454 if (info
.has_divergent_break
) {
455 aco_ptr
<Instruction
>& phi
= header
->instructions
[k
];
456 assert(phi
->opcode
== aco_opcode::p_linear_phi
);
457 for (unsigned i
= 1; i
< phi
->operands
.size(); i
++)
458 phi
->operands
[i
] = Operand(ctx
.info
[header_preds
[i
]].exec
[info
.num_exec_masks
].first
);
461 assert(!(block
->kind
& block_kind_top_level
) || info
.num_exec_masks
<= 2);
463 /* create the loop exit phis if not trivial */
464 for (unsigned k
= 0; k
< info
.num_exec_masks
; k
++) {
465 Temp same
= ctx
.info
[preds
[0]].exec
[k
].first
;
466 uint8_t type
= ctx
.info
[header_preds
[0]].exec
[k
].second
;
469 for (unsigned i
= 1; i
< preds
.size() && trivial
; i
++) {
470 if (ctx
.info
[preds
[i
]].exec
[k
].first
!= same
)
475 ctx
.info
[idx
].exec
.emplace_back(same
, type
);
477 /* create phi for loop footer */
478 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_linear_phi
, Format::PSEUDO
, preds
.size(), 1)};
479 phi
->definitions
[0] = bld
.def(s2
);
480 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++)
481 phi
->operands
[i
] = Operand(ctx
.info
[preds
[i
]].exec
[k
].first
);
482 ctx
.info
[idx
].exec
.emplace_back(bld
.insert(std::move(phi
)), type
);
485 assert(ctx
.info
[idx
].exec
.size() == info
.num_exec_masks
);
487 /* create a parallelcopy to move the live mask to exec */
489 while (block
->instructions
[i
]->opcode
!= aco_opcode::p_logical_start
) {
490 bld
.insert(std::move(block
->instructions
[i
]));
494 if (ctx
.handle_wqm
) {
495 if (block
->kind
& block_kind_top_level
&& ctx
.info
[idx
].exec
.size() == 2) {
496 if ((ctx
.info
[idx
].block_needs
| ctx
.info
[idx
].ever_again_needs
) == 0 ||
497 (ctx
.info
[idx
].block_needs
| ctx
.info
[idx
].ever_again_needs
) == Exact
) {
498 ctx
.info
[idx
].exec
.back().second
|= mask_type_global
;
499 transition_to_Exact(ctx
, bld
, idx
);
500 ctx
.handle_wqm
= false;
503 if (ctx
.info
[idx
].block_needs
== WQM
)
504 transition_to_WQM(ctx
, bld
, idx
);
505 else if (ctx
.info
[idx
].block_needs
== Exact
)
506 transition_to_Exact(ctx
, bld
, idx
);
509 ctx
.info
[idx
].exec
.back().first
= bld
.pseudo(aco_opcode::p_parallelcopy
, bld
.def(s2
, exec
),
510 ctx
.info
[idx
].exec
.back().first
);
516 if (preds
.size() == 1) {
517 ctx
.info
[idx
].exec
= ctx
.info
[preds
[0]].exec
;
519 assert(preds
.size() == 2);
520 /* if one of the predecessors ends in exact mask, we pop it from stack */
521 unsigned num_exec_masks
= std::min(ctx
.info
[preds
[0]].exec
.size(),
522 ctx
.info
[preds
[1]].exec
.size());
523 if (block
->kind
& block_kind_top_level
&& !(block
->kind
& block_kind_merge
))
524 num_exec_masks
= std::min(num_exec_masks
, 2u);
526 /* create phis for diverged exec masks */
527 for (unsigned i
= 0; i
< num_exec_masks
; i
++) {
528 bool in_exec
= i
== num_exec_masks
- 1 && !(block
->kind
& block_kind_merge
);
529 if (!in_exec
&& ctx
.info
[preds
[0]].exec
[i
].first
== ctx
.info
[preds
[1]].exec
[i
].first
) {
530 assert(ctx
.info
[preds
[0]].exec
[i
].second
== ctx
.info
[preds
[1]].exec
[i
].second
);
531 ctx
.info
[idx
].exec
.emplace_back(ctx
.info
[preds
[0]].exec
[i
]);
535 Temp phi
= bld
.pseudo(aco_opcode::p_linear_phi
, in_exec
? bld
.def(s2
, exec
) : bld
.def(s2
),
536 ctx
.info
[preds
[0]].exec
[i
].first
,
537 ctx
.info
[preds
[1]].exec
[i
].first
);
538 uint8_t mask_type
= ctx
.info
[preds
[0]].exec
[i
].second
& ctx
.info
[preds
[1]].exec
[i
].second
;
539 ctx
.info
[idx
].exec
.emplace_back(phi
, mask_type
);
544 while (block
->instructions
[i
]->opcode
== aco_opcode::p_phi
||
545 block
->instructions
[i
]->opcode
== aco_opcode::p_linear_phi
) {
546 bld
.insert(std::move(block
->instructions
[i
]));
550 if (block
->kind
& block_kind_merge
)
551 ctx
.info
[idx
].exec
.pop_back();
553 if (block
->kind
& block_kind_top_level
&& ctx
.info
[idx
].exec
.size() == 3) {
554 assert(ctx
.info
[idx
].exec
.back().second
== mask_type_exact
);
555 assert(block
->kind
& block_kind_merge
);
556 ctx
.info
[idx
].exec
.pop_back();
559 /* try to satisfy the block's needs */
560 if (ctx
.handle_wqm
) {
561 if (block
->kind
& block_kind_top_level
&& ctx
.info
[idx
].exec
.size() == 2) {
562 if ((ctx
.info
[idx
].block_needs
| ctx
.info
[idx
].ever_again_needs
) == 0 ||
563 (ctx
.info
[idx
].block_needs
| ctx
.info
[idx
].ever_again_needs
) == Exact
) {
564 ctx
.info
[idx
].exec
.back().second
|= mask_type_global
;
565 transition_to_Exact(ctx
, bld
, idx
);
566 ctx
.handle_wqm
= false;
569 if (ctx
.info
[idx
].block_needs
== WQM
)
570 transition_to_WQM(ctx
, bld
, idx
);
571 else if (ctx
.info
[idx
].block_needs
== Exact
)
572 transition_to_Exact(ctx
, bld
, idx
);
575 if (block
->kind
& block_kind_merge
) {
576 Temp restore
= ctx
.info
[idx
].exec
.back().first
;
577 ctx
.info
[idx
].exec
.back().first
= bld
.pseudo(aco_opcode::p_parallelcopy
, bld
.def(s2
, exec
), restore
);
583 void lower_fs_buffer_store_smem(Builder
& bld
, bool need_check
, aco_ptr
<Instruction
>& instr
, Temp cur_exec
)
585 Operand offset
= instr
->operands
[1];
587 /* if exec is zero, then use UINT32_MAX as an offset and make this store a no-op */
588 Temp nonempty
= bld
.sopc(aco_opcode::s_cmp_lg_u64
, bld
.def(s1
, scc
), cur_exec
, Operand(0u));
590 if (offset
.isLiteral())
591 offset
= bld
.sop1(aco_opcode::s_mov_b32
, bld
.def(s1
), offset
);
593 offset
= bld
.sop2(aco_opcode::s_cselect_b32
, bld
.hint_m0(bld
.def(s1
)),
594 offset
, Operand(UINT32_MAX
), bld
.scc(nonempty
));
595 } else if (offset
.isConstant() && offset
.constantValue() > 0xFFFFF) {
596 offset
= bld
.sop1(aco_opcode::s_mov_b32
, bld
.hint_m0(bld
.def(s1
)), offset
);
598 if (!offset
.isConstant())
601 switch (instr
->operands
[2].size()) {
603 instr
->opcode
= aco_opcode::s_buffer_store_dword
;
606 instr
->opcode
= aco_opcode::s_buffer_store_dwordx2
;
609 instr
->opcode
= aco_opcode::s_buffer_store_dwordx4
;
612 unreachable("Invalid SMEM buffer store size");
614 instr
->operands
[1] = offset
;
615 /* as_uniform() needs to be done here so it's done in exact mode and helper
616 * lanes don't contribute. */
617 instr
->operands
[2] = Operand(bld
.as_uniform(instr
->operands
[2]));
620 void process_instructions(exec_ctx
& ctx
, Block
* block
,
621 std::vector
<aco_ptr
<Instruction
>>& instructions
,
625 if (ctx
.info
[block
->index
].exec
.back().second
& mask_type_wqm
)
628 assert(!ctx
.handle_wqm
|| ctx
.info
[block
->index
].exec
.back().second
& mask_type_exact
);
632 /* if the block doesn't need both, WQM and Exact, we can skip processing the instructions */
633 bool process
= (ctx
.handle_wqm
&&
634 (ctx
.info
[block
->index
].block_needs
& state
) !=
635 (ctx
.info
[block
->index
].block_needs
& (WQM
| Exact
))) ||
636 block
->kind
& block_kind_uses_discard_if
||
637 block
->kind
& block_kind_uses_demote
||
638 block
->kind
& block_kind_needs_lowering
;
640 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(block
->instructions
.begin(), idx
);
641 instructions
.insert(instructions
.end(),
642 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(it
),
643 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(block
->instructions
.end()));
647 Builder
bld(ctx
.program
, &instructions
);
649 for (; idx
< block
->instructions
.size(); idx
++) {
650 aco_ptr
<Instruction
> instr
= std::move(block
->instructions
[idx
]);
652 WQMState needs
= ctx
.handle_wqm
? ctx
.info
[block
->index
].instr_needs
[idx
] : Unspecified
;
654 if (instr
->opcode
== aco_opcode::p_discard_if
) {
655 if (ctx
.info
[block
->index
].block_needs
& Preserve_WQM
) {
656 assert(block
->kind
& block_kind_top_level
);
657 transition_to_WQM(ctx
, bld
, block
->index
);
658 ctx
.info
[block
->index
].exec
.back().second
&= ~mask_type_global
;
660 unsigned num
= ctx
.info
[block
->index
].exec
.size();
662 Operand cond
= instr
->operands
[0];
663 instr
.reset(create_instruction
<Pseudo_instruction
>(aco_opcode::p_discard_if
, Format::PSEUDO
, num
+ 1, num
+ 1));
664 for (unsigned i
= 0; i
< num
; i
++) {
665 instr
->operands
[i
] = Operand(ctx
.info
[block
->index
].exec
[i
].first
);
667 instr
->operands
[i
].setFixed(exec
);
668 Temp new_mask
= bld
.tmp(s2
);
669 instr
->definitions
[i
] = Definition(new_mask
);
670 ctx
.info
[block
->index
].exec
[i
].first
= new_mask
;
672 assert((ctx
.info
[block
->index
].exec
[0].second
& mask_type_wqm
) == 0);
673 instr
->definitions
[num
- 1].setFixed(exec
);
674 instr
->operands
[num
] = cond
;
675 instr
->definitions
[num
] = bld
.def(s1
, scc
);
677 } else if (needs
== WQM
&& state
!= WQM
) {
678 transition_to_WQM(ctx
, bld
, block
->index
);
680 } else if (needs
== Exact
&& state
!= Exact
) {
681 transition_to_Exact(ctx
, bld
, block
->index
);
685 if (instr
->opcode
== aco_opcode::p_is_helper
|| instr
->opcode
== aco_opcode::p_load_helper
) {
686 Definition dst
= instr
->definitions
[0];
687 if (state
== Exact
) {
688 instr
.reset(create_instruction
<SOP1_instruction
>(aco_opcode::s_mov_b64
, Format::SOP1
, 1, 1));
689 instr
->operands
[0] = Operand(0u);
690 instr
->definitions
[0] = dst
;
692 std::pair
<Temp
, uint8_t>& exact_mask
= ctx
.info
[block
->index
].exec
[0];
693 if (instr
->opcode
== aco_opcode::p_load_helper
&&
694 !(ctx
.info
[block
->index
].exec
[0].second
& mask_type_initial
)) {
695 /* find last initial exact mask */
696 for (int i
= block
->index
; i
>= 0; i
--) {
697 if (ctx
.program
->blocks
[i
].kind
& block_kind_top_level
&&
698 ctx
.info
[i
].exec
[0].second
& mask_type_initial
) {
699 exact_mask
= ctx
.info
[i
].exec
[0];
705 assert(instr
->opcode
== aco_opcode::p_is_helper
|| exact_mask
.second
& mask_type_initial
);
706 assert(exact_mask
.second
& mask_type_exact
);
708 instr
.reset(create_instruction
<SOP2_instruction
>(aco_opcode::s_andn2_b64
, Format::SOP2
, 2, 2));
709 instr
->operands
[0] = Operand(ctx
.info
[block
->index
].exec
.back().first
); /* current exec */
710 instr
->operands
[1] = Operand(exact_mask
.first
);
711 instr
->definitions
[0] = dst
;
712 instr
->definitions
[1] = bld
.def(s1
, scc
);
714 } else if (instr
->opcode
== aco_opcode::p_demote_to_helper
) {
715 /* turn demote into discard_if with only exact masks */
716 assert((ctx
.info
[block
->index
].exec
[0].second
& (mask_type_exact
| mask_type_global
)) == (mask_type_exact
| mask_type_global
));
717 ctx
.info
[block
->index
].exec
[0].second
&= ~mask_type_initial
;
721 if (instr
->operands
.empty()) {
722 /* transition to exact and set exec to zero */
723 Temp old_exec
= ctx
.info
[block
->index
].exec
.back().first
;
724 Temp new_exec
= bld
.tmp(s2
);
725 cond
= bld
.sop1(aco_opcode::s_and_saveexec_b64
, bld
.def(s2
), bld
.def(s1
, scc
),
726 bld
.exec(Definition(new_exec
)), Operand(0u), bld
.exec(old_exec
));
727 if (ctx
.info
[block
->index
].exec
.back().second
& mask_type_exact
) {
728 ctx
.info
[block
->index
].exec
.back().first
= new_exec
;
730 ctx
.info
[block
->index
].exec
.back().first
= cond
;
731 ctx
.info
[block
->index
].exec
.emplace_back(new_exec
, mask_type_exact
);
734 /* demote_if: transition to exact */
735 transition_to_Exact(ctx
, bld
, block
->index
);
736 assert(instr
->operands
[0].isTemp());
737 cond
= instr
->operands
[0].getTemp();
741 for (unsigned i
= 0; i
< ctx
.info
[block
->index
].exec
.size() - 1; i
++)
742 num
+= ctx
.info
[block
->index
].exec
[i
].second
& mask_type_exact
? 1 : 0;
743 instr
.reset(create_instruction
<Instruction
>(aco_opcode::p_discard_if
, Format::PSEUDO
, num
+ 1, num
+ 1));
745 for (unsigned i
= 0; k
< num
; i
++) {
746 if (ctx
.info
[block
->index
].exec
[i
].second
& mask_type_exact
) {
747 instr
->operands
[k
] = Operand(ctx
.info
[block
->index
].exec
[i
].first
);
748 Temp new_mask
= bld
.tmp(s2
);
749 instr
->definitions
[k
] = Definition(new_mask
);
750 if (i
== ctx
.info
[block
->index
].exec
.size() - 1)
751 instr
->definitions
[k
].setFixed(exec
);
753 ctx
.info
[block
->index
].exec
[i
].first
= new_mask
;
757 instr
->definitions
[num
] = bld
.def(s1
, scc
);
758 instr
->operands
[num
] = Operand(cond
);
761 } else if (instr
->opcode
== aco_opcode::p_fs_buffer_store_smem
) {
762 bool need_check
= ctx
.info
[block
->index
].exec
.size() != 1 &&
763 !(ctx
.info
[block
->index
].exec
[ctx
.info
[block
->index
].exec
.size() - 2].second
& Exact
);
764 lower_fs_buffer_store_smem(bld
, need_check
, instr
, ctx
.info
[block
->index
].exec
.back().first
);
767 bld
.insert(std::move(instr
));
771 void add_branch_code(exec_ctx
& ctx
, Block
* block
)
773 unsigned idx
= block
->index
;
774 Builder
bld(ctx
.program
, block
);
776 if (idx
== ctx
.program
->blocks
.size() - 1)
779 /* try to disable wqm handling */
780 if (ctx
.handle_wqm
&& block
->kind
& block_kind_top_level
) {
781 if (ctx
.info
[idx
].exec
.size() == 3) {
782 assert(ctx
.info
[idx
].exec
[1].second
== mask_type_wqm
);
783 ctx
.info
[idx
].exec
.pop_back();
785 assert(ctx
.info
[idx
].exec
.size() <= 2);
787 if (ctx
.info
[idx
].ever_again_needs
== 0 ||
788 ctx
.info
[idx
].ever_again_needs
== Exact
) {
789 /* transition to Exact */
790 aco_ptr
<Instruction
> branch
= std::move(block
->instructions
.back());
791 block
->instructions
.pop_back();
792 ctx
.info
[idx
].exec
.back().second
|= mask_type_global
;
793 transition_to_Exact(ctx
, bld
, idx
);
794 bld
.insert(std::move(branch
));
795 ctx
.handle_wqm
= false;
797 } else if (ctx
.info
[idx
].block_needs
& Preserve_WQM
) {
798 /* transition to WQM and remove global flag */
799 aco_ptr
<Instruction
> branch
= std::move(block
->instructions
.back());
800 block
->instructions
.pop_back();
801 transition_to_WQM(ctx
, bld
, idx
);
802 ctx
.info
[idx
].exec
.back().second
&= ~mask_type_global
;
803 bld
.insert(std::move(branch
));
807 if (block
->kind
& block_kind_loop_preheader
) {
808 /* collect information about the succeeding loop */
809 bool has_divergent_break
= false;
810 bool has_divergent_continue
= false;
811 bool has_discard
= false;
813 unsigned loop_nest_depth
= ctx
.program
->blocks
[idx
+ 1].loop_nest_depth
;
815 for (unsigned i
= idx
+ 1; ctx
.program
->blocks
[i
].loop_nest_depth
>= loop_nest_depth
; i
++) {
816 Block
& loop_block
= ctx
.program
->blocks
[i
];
817 needs
|= ctx
.info
[i
].block_needs
;
819 if (loop_block
.kind
& block_kind_uses_discard_if
||
820 loop_block
.kind
& block_kind_discard
||
821 loop_block
.kind
& block_kind_uses_demote
)
823 if (loop_block
.loop_nest_depth
!= loop_nest_depth
)
826 if (loop_block
.kind
& block_kind_uniform
)
828 else if (loop_block
.kind
& block_kind_break
)
829 has_divergent_break
= true;
830 else if (loop_block
.kind
& block_kind_continue
)
831 has_divergent_continue
= true;
834 if (ctx
.handle_wqm
) {
836 aco_ptr
<Instruction
> branch
= std::move(block
->instructions
.back());
837 block
->instructions
.pop_back();
838 transition_to_WQM(ctx
, bld
, idx
);
839 bld
.insert(std::move(branch
));
841 aco_ptr
<Instruction
> branch
= std::move(block
->instructions
.back());
842 block
->instructions
.pop_back();
843 transition_to_Exact(ctx
, bld
, idx
);
844 bld
.insert(std::move(branch
));
848 unsigned num_exec_masks
= ctx
.info
[idx
].exec
.size();
849 if (block
->kind
& block_kind_top_level
)
850 num_exec_masks
= std::min(num_exec_masks
, 2u);
852 ctx
.loop
.emplace_back(&ctx
.program
->blocks
[block
->linear_succs
[0]],
856 has_divergent_continue
,
860 if (block
->kind
& block_kind_discard
) {
862 assert(block
->instructions
.back()->format
== Format::PSEUDO_BRANCH
);
863 aco_ptr
<Instruction
> branch
= std::move(block
->instructions
.back());
864 block
->instructions
.pop_back();
866 /* create a discard_if() instruction with the exec mask as condition */
868 if (ctx
.loop
.size()) {
869 /* if we're in a loop, only discard from the outer exec masks */
870 num
= ctx
.loop
.back().num_exec_masks
;
872 num
= ctx
.info
[idx
].exec
.size() - 1;
875 Temp old_exec
= ctx
.info
[idx
].exec
.back().first
;
876 Temp new_exec
= bld
.tmp(s2
);
877 Temp cond
= bld
.sop1(aco_opcode::s_and_saveexec_b64
, bld
.def(s2
), bld
.def(s1
, scc
),
878 bld
.exec(Definition(new_exec
)), Operand(0u), bld
.exec(old_exec
));
879 ctx
.info
[idx
].exec
.back().first
= new_exec
;
881 aco_ptr
<Pseudo_instruction
> discard
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_discard_if
, Format::PSEUDO
, num
+ 1, num
+ 1)};
882 for (unsigned i
= 0; i
< num
; i
++) {
883 discard
->operands
[i
] = Operand(ctx
.info
[block
->index
].exec
[i
].first
);
884 Temp new_mask
= bld
.tmp(s2
);
885 discard
->definitions
[i
] = Definition(new_mask
);
886 ctx
.info
[block
->index
].exec
[i
].first
= new_mask
;
888 assert(!ctx
.handle_wqm
|| (ctx
.info
[block
->index
].exec
[0].second
& mask_type_wqm
) == 0);
889 discard
->operands
[num
] = Operand(cond
);
890 discard
->definitions
[num
] = bld
.def(s1
, scc
);
892 bld
.insert(std::move(discard
));
893 if ((block
->kind
& (block_kind_break
| block_kind_uniform
)) == block_kind_break
)
894 ctx
.info
[idx
].exec
.back().first
= cond
;
895 bld
.insert(std::move(branch
));
896 /* no return here as it can be followed by a divergent break */
899 if (block
->kind
& block_kind_continue_or_break
) {
900 assert(block
->instructions
.back()->opcode
== aco_opcode::p_branch
);
901 block
->instructions
.pop_back();
903 /* because of how linear_succs is created, this needs to be swapped */
904 std::swap(block
->linear_succs
[0], block
->linear_succs
[1]);
906 assert(ctx
.program
->blocks
[block
->linear_succs
[1]].kind
& block_kind_loop_header
);
907 assert(ctx
.program
->blocks
[ctx
.program
->blocks
[block
->linear_succs
[0]].linear_succs
[0]].kind
& block_kind_loop_exit
);
909 if (ctx
.info
[idx
].exec
.back().second
& mask_type_loop
) {
910 bld
.branch(aco_opcode::p_cbranch_nz
, bld
.exec(ctx
.info
[idx
].exec
.back().first
), block
->linear_succs
[1], block
->linear_succs
[0]);
913 for (int exec_idx
= ctx
.info
[idx
].exec
.size() - 1; exec_idx
>= 0; exec_idx
--) {
914 if (ctx
.info
[idx
].exec
[exec_idx
].second
& mask_type_loop
) {
915 cond
= bld
.sopc(aco_opcode::s_cmp_lg_u64
, bld
.def(s1
, scc
), ctx
.info
[idx
].exec
[exec_idx
].first
, Operand(0u));
919 assert(cond
!= Temp());
921 bld
.branch(aco_opcode::p_cbranch_nz
, bld
.scc(cond
), block
->linear_succs
[1], block
->linear_succs
[0]);
926 if (block
->kind
& block_kind_uniform
) {
927 Pseudo_branch_instruction
* branch
= static_cast<Pseudo_branch_instruction
*>(block
->instructions
.back().get());
928 if (branch
->opcode
== aco_opcode::p_branch
) {
929 branch
->target
[0] = block
->linear_succs
[0];
931 branch
->target
[0] = block
->linear_succs
[1];
932 branch
->target
[1] = block
->linear_succs
[0];
937 if (block
->kind
& block_kind_branch
) {
939 if (ctx
.handle_wqm
&&
940 ctx
.info
[idx
].exec
.size() >= 2 &&
941 ctx
.info
[idx
].exec
.back().second
== mask_type_exact
&&
942 !(ctx
.info
[idx
].block_needs
& Exact_Branch
) &&
943 ctx
.info
[idx
].exec
[ctx
.info
[idx
].exec
.size() - 2].second
& mask_type_wqm
) {
944 /* return to wqm before branching */
945 ctx
.info
[idx
].exec
.pop_back();
948 // orig = s_and_saveexec_b64
949 assert(block
->linear_succs
.size() == 2);
950 assert(block
->instructions
.back()->opcode
== aco_opcode::p_cbranch_z
);
951 Temp cond
= block
->instructions
.back()->operands
[0].getTemp();
952 block
->instructions
.pop_back();
954 if (ctx
.info
[idx
].block_needs
& Exact_Branch
)
955 transition_to_Exact(ctx
, bld
, idx
);
957 Temp current_exec
= ctx
.info
[idx
].exec
.back().first
;
958 uint8_t mask_type
= ctx
.info
[idx
].exec
.back().second
& (mask_type_wqm
| mask_type_exact
);
960 Temp then_mask
= bld
.tmp(s2
);
961 Temp old_exec
= bld
.sop1(aco_opcode::s_and_saveexec_b64
, bld
.def(s2
), bld
.def(s1
, scc
),
962 bld
.exec(Definition(then_mask
)), cond
, bld
.exec(current_exec
));
964 ctx
.info
[idx
].exec
.back().first
= old_exec
;
966 /* add next current exec to the stack */
967 ctx
.info
[idx
].exec
.emplace_back(then_mask
, mask_type
);
969 bld
.branch(aco_opcode::p_cbranch_z
, bld
.exec(then_mask
), block
->linear_succs
[1], block
->linear_succs
[0]);
973 if (block
->kind
& block_kind_invert
) {
974 // exec = s_andn2_b64 (original_exec, exec)
975 assert(block
->instructions
.back()->opcode
== aco_opcode::p_cbranch_nz
);
976 block
->instructions
.pop_back();
977 Temp then_mask
= ctx
.info
[idx
].exec
.back().first
;
978 uint8_t mask_type
= ctx
.info
[idx
].exec
.back().second
;
979 ctx
.info
[idx
].exec
.pop_back();
980 Temp orig_exec
= ctx
.info
[idx
].exec
.back().first
;
981 Temp else_mask
= bld
.sop2(aco_opcode::s_andn2_b64
, bld
.def(s2
, exec
),
982 bld
.def(s1
, scc
), orig_exec
, bld
.exec(then_mask
));
984 /* add next current exec to the stack */
985 ctx
.info
[idx
].exec
.emplace_back(else_mask
, mask_type
);
987 bld
.branch(aco_opcode::p_cbranch_z
, bld
.exec(else_mask
), block
->linear_succs
[1], block
->linear_succs
[0]);
991 if (block
->kind
& block_kind_break
) {
992 // loop_mask = s_andn2_b64 (loop_mask, exec)
993 assert(block
->instructions
.back()->opcode
== aco_opcode::p_branch
);
994 block
->instructions
.pop_back();
996 Temp current_exec
= ctx
.info
[idx
].exec
.back().first
;
998 for (int exec_idx
= ctx
.info
[idx
].exec
.size() - 2; exec_idx
>= 0; exec_idx
--) {
1000 Temp exec_mask
= ctx
.info
[idx
].exec
[exec_idx
].first
;
1001 exec_mask
= bld
.sop2(aco_opcode::s_andn2_b64
, bld
.def(s2
), bld
.scc(Definition(cond
)),
1002 exec_mask
, current_exec
);
1003 ctx
.info
[idx
].exec
[exec_idx
].first
= exec_mask
;
1004 if (ctx
.info
[idx
].exec
[exec_idx
].second
& mask_type_loop
)
1008 /* check if the successor is the merge block, otherwise set exec to 0 */
1009 // TODO: this could be done better by directly branching to the merge block
1010 unsigned succ_idx
= ctx
.program
->blocks
[block
->linear_succs
[1]].linear_succs
[0];
1011 Block
& succ
= ctx
.program
->blocks
[succ_idx
];
1012 if (!(succ
.kind
& block_kind_invert
|| succ
.kind
& block_kind_merge
)) {
1013 ctx
.info
[idx
].exec
.back().first
= bld
.sop1(aco_opcode::s_mov_b64
, bld
.def(s2
, exec
), Operand(0u));
1016 bld
.branch(aco_opcode::p_cbranch_nz
, bld
.scc(cond
), block
->linear_succs
[1], block
->linear_succs
[0]);
1020 if (block
->kind
& block_kind_continue
) {
1021 assert(block
->instructions
.back()->opcode
== aco_opcode::p_branch
);
1022 block
->instructions
.pop_back();
1024 Temp current_exec
= ctx
.info
[idx
].exec
.back().first
;
1026 for (int exec_idx
= ctx
.info
[idx
].exec
.size() - 2; exec_idx
>= 0; exec_idx
--) {
1027 if (ctx
.info
[idx
].exec
[exec_idx
].second
& mask_type_loop
)
1030 Temp exec_mask
= ctx
.info
[idx
].exec
[exec_idx
].first
;
1031 exec_mask
= bld
.sop2(aco_opcode::s_andn2_b64
, bld
.def(s2
), bld
.scc(Definition(cond
)),
1032 exec_mask
, bld
.exec(current_exec
));
1033 ctx
.info
[idx
].exec
[exec_idx
].first
= exec_mask
;
1035 assert(cond
!= Temp());
1037 /* check if the successor is the merge block, otherwise set exec to 0 */
1038 // TODO: this could be done better by directly branching to the merge block
1039 unsigned succ_idx
= ctx
.program
->blocks
[block
->linear_succs
[1]].linear_succs
[0];
1040 Block
& succ
= ctx
.program
->blocks
[succ_idx
];
1041 if (!(succ
.kind
& block_kind_invert
|| succ
.kind
& block_kind_merge
)) {
1042 ctx
.info
[idx
].exec
.back().first
= bld
.sop1(aco_opcode::s_mov_b64
, bld
.def(s2
, exec
), Operand(0u));
1045 bld
.branch(aco_opcode::p_cbranch_nz
, bld
.scc(cond
), block
->linear_succs
[1], block
->linear_succs
[0]);
1050 void process_block(exec_ctx
& ctx
, Block
* block
)
1052 std::vector
<aco_ptr
<Instruction
>> instructions
;
1053 instructions
.reserve(block
->instructions
.size());
1055 unsigned idx
= add_coupling_code(ctx
, block
, instructions
);
1057 assert(block
->index
!= ctx
.program
->blocks
.size() - 1 ||
1058 ctx
.info
[block
->index
].exec
.size() <= 2);
1060 process_instructions(ctx
, block
, instructions
, idx
);
1062 block
->instructions
= std::move(instructions
);
1064 add_branch_code(ctx
, block
);
1066 block
->live_out_exec
= ctx
.info
[block
->index
].exec
.back().first
;
1069 } /* end namespace */
1072 void insert_exec_mask(Program
*program
)
1074 exec_ctx
ctx(program
);
1076 if (program
->needs_wqm
&& program
->needs_exact
)
1077 calculate_wqm_needs(ctx
);
1079 for (Block
& block
: program
->blocks
)
1080 process_block(ctx
, &block
);