2 * Copyright © 2019 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 enum chip_class chip_class
;
34 unsigned vcc_physical
;
37 /* just initialize these with something less than max NOPs */
38 int VALU_wrexec
= -10;
40 int VALU_wrsgpr
= -10;
43 bool has_VOPC
= false;
44 bool has_nonVALU_exec_read
= false;
45 bool has_VMEM
= false;
46 bool has_branch_after_VMEM
= false;
48 bool has_branch_after_DS
= false;
49 std::bitset
<128> sgprs_read_by_SMEM
;
50 std::bitset
<128> sgprs_read_by_VMEM
;
52 NOP_ctx(Program
* program
) : chip_class(program
->chip_class
) {
53 vcc_physical
= program
->config
->num_sgprs
- 2;
57 template <std::size_t N
>
58 bool check_written_regs(const aco_ptr
<Instruction
> &instr
, const std::bitset
<N
> &check_regs
)
60 return std::any_of(instr
->definitions
.begin(), instr
->definitions
.end(), [&check_regs
](const Definition
&def
) -> bool {
61 bool writes_any
= false;
62 for (unsigned i
= 0; i
< def
.size(); i
++) {
63 unsigned def_reg
= def
.physReg() + i
;
64 writes_any
|= def_reg
< check_regs
.size() && check_regs
[def_reg
];
70 template <std::size_t N
>
71 void mark_read_regs(const aco_ptr
<Instruction
> &instr
, std::bitset
<N
> ®_reads
)
73 for (const Operand
&op
: instr
->operands
) {
74 for (unsigned i
= 0; i
< op
.size(); i
++) {
75 unsigned reg
= op
.physReg() + i
;
76 if (reg
< reg_reads
.size())
82 bool VALU_writes_sgpr(aco_ptr
<Instruction
>& instr
)
84 if ((uint32_t) instr
->format
& (uint32_t) Format::VOPC
)
86 if (instr
->isVOP3() && instr
->definitions
.size() == 2)
88 if (instr
->opcode
== aco_opcode::v_readfirstlane_b32
|| instr
->opcode
== aco_opcode::v_readlane_b32
)
93 bool instr_reads_exec(const aco_ptr
<Instruction
>& instr
)
95 return std::any_of(instr
->operands
.begin(), instr
->operands
.end(), [](const Operand
&op
) -> bool {
96 return op
.physReg() == exec_lo
|| op
.physReg() == exec_hi
;
100 bool instr_writes_exec(const aco_ptr
<Instruction
>& instr
)
102 return std::any_of(instr
->definitions
.begin(), instr
->definitions
.end(), [](const Definition
&def
) -> bool {
103 return def
.physReg() == exec_lo
|| def
.physReg() == exec_hi
;
107 bool instr_writes_sgpr(const aco_ptr
<Instruction
>& instr
)
109 return std::any_of(instr
->definitions
.begin(), instr
->definitions
.end(), [](const Definition
&def
) -> bool {
110 return def
.getTemp().type() == RegType::sgpr
;
114 inline bool instr_is_branch(const aco_ptr
<Instruction
>& instr
)
116 return instr
->opcode
== aco_opcode::s_branch
||
117 instr
->opcode
== aco_opcode::s_cbranch_scc0
||
118 instr
->opcode
== aco_opcode::s_cbranch_scc1
||
119 instr
->opcode
== aco_opcode::s_cbranch_vccz
||
120 instr
->opcode
== aco_opcode::s_cbranch_vccnz
||
121 instr
->opcode
== aco_opcode::s_cbranch_execz
||
122 instr
->opcode
== aco_opcode::s_cbranch_execnz
||
123 instr
->opcode
== aco_opcode::s_cbranch_cdbgsys
||
124 instr
->opcode
== aco_opcode::s_cbranch_cdbguser
||
125 instr
->opcode
== aco_opcode::s_cbranch_cdbgsys_or_user
||
126 instr
->opcode
== aco_opcode::s_cbranch_cdbgsys_and_user
||
127 instr
->opcode
== aco_opcode::s_subvector_loop_begin
||
128 instr
->opcode
== aco_opcode::s_subvector_loop_end
||
129 instr
->opcode
== aco_opcode::s_setpc_b64
||
130 instr
->opcode
== aco_opcode::s_swappc_b64
||
131 instr
->opcode
== aco_opcode::s_getpc_b64
||
132 instr
->opcode
== aco_opcode::s_call_b64
;
135 bool regs_intersect(PhysReg a_reg
, unsigned a_size
, PhysReg b_reg
, unsigned b_size
)
137 return a_reg
> b_reg
?
138 (a_reg
- b_reg
< b_size
) :
139 (b_reg
- a_reg
< a_size
);
142 unsigned handle_SMEM_clause(aco_ptr
<Instruction
>& instr
, int new_idx
,
143 std::vector
<aco_ptr
<Instruction
>>& new_instructions
)
145 //TODO: s_dcache_inv needs to be in it's own group on GFX10 (and previous versions?)
146 const bool is_store
= instr
->definitions
.empty();
147 for (int pred_idx
= new_idx
- 1; pred_idx
>= 0; pred_idx
--) {
148 aco_ptr
<Instruction
>& pred
= new_instructions
[pred_idx
];
149 if (pred
->format
!= Format::SMEM
)
152 /* Don't allow clauses with store instructions since the clause's
153 * instructions may use the same address. */
154 if (is_store
|| pred
->definitions
.empty())
157 Definition
& instr_def
= instr
->definitions
[0];
158 Definition
& pred_def
= pred
->definitions
[0];
160 /* ISA reference doesn't say anything about this, but best to be safe */
161 if (regs_intersect(instr_def
.physReg(), instr_def
.size(), pred_def
.physReg(), pred_def
.size()))
164 for (const Operand
& op
: pred
->operands
) {
165 if (op
.isConstant() || !op
.isFixed())
167 if (regs_intersect(instr_def
.physReg(), instr_def
.size(), op
.physReg(), op
.size()))
170 for (const Operand
& op
: instr
->operands
) {
171 if (op
.isConstant() || !op
.isFixed())
173 if (regs_intersect(pred_def
.physReg(), pred_def
.size(), op
.physReg(), op
.size()))
181 int handle_instruction(NOP_ctx
& ctx
, aco_ptr
<Instruction
>& instr
,
182 std::vector
<aco_ptr
<Instruction
>>& old_instructions
,
183 std::vector
<aco_ptr
<Instruction
>>& new_instructions
)
185 int new_idx
= new_instructions
.size();
187 // TODO: setreg / getreg / m0 writes
188 // TODO: try to schedule the NOP-causing instruction up to reduce the number of stall cycles
190 /* break off from prevous SMEM clause if needed */
191 if (instr
->format
== Format::SMEM
&& ctx
.chip_class
>= GFX8
) {
192 return handle_SMEM_clause(instr
, new_idx
, new_instructions
);
193 } else if (instr
->isVALU() || instr
->format
== Format::VINTRP
) {
196 if (instr
->isDPP()) {
197 /* VALU does not forward EXEC to DPP. */
198 if (ctx
.VALU_wrexec
+ 5 >= new_idx
)
199 NOPs
= 5 + ctx
.VALU_wrexec
- new_idx
+ 1;
201 /* VALU DPP reads VGPR written by VALU */
202 for (int pred_idx
= new_idx
- 1; pred_idx
>= 0 && pred_idx
>= new_idx
- 2; pred_idx
--) {
203 aco_ptr
<Instruction
>& pred
= new_instructions
[pred_idx
];
204 if ((pred
->isVALU() || pred
->format
== Format::VINTRP
) &&
205 !pred
->definitions
.empty() &&
206 pred
->definitions
[0].physReg() == instr
->operands
[0].physReg()) {
207 NOPs
= std::max(NOPs
, 2 + pred_idx
- new_idx
+ 1);
214 if (instr
->format
== Format::VINTRP
&& new_idx
> 0 && ctx
.chip_class
>= GFX9
) {
215 aco_ptr
<Instruction
>& pred
= new_instructions
.back();
216 if (pred
->isSALU() &&
217 !pred
->definitions
.empty() &&
218 pred
->definitions
[0].physReg() == m0
)
219 NOPs
= std::max(NOPs
, 1);
222 for (const Operand
& op
: instr
->operands
) {
223 /* VALU which uses VCCZ */
224 if (op
.physReg() == PhysReg
{251} &&
225 ctx
.VALU_wrvcc
+ 5 >= new_idx
)
226 NOPs
= std::max(NOPs
, 5 + ctx
.VALU_wrvcc
- new_idx
+ 1);
228 /* VALU which uses EXECZ */
229 if (op
.physReg() == PhysReg
{252} &&
230 ctx
.VALU_wrexec
+ 5 >= new_idx
)
231 NOPs
= std::max(NOPs
, 5 + ctx
.VALU_wrexec
- new_idx
+ 1);
233 /* VALU which reads VCC as a constant */
234 if (ctx
.VALU_wrvcc
+ 1 >= new_idx
) {
235 for (unsigned k
= 0; k
< op
.size(); k
++) {
236 unsigned reg
= op
.physReg() + k
;
237 if (reg
== ctx
.vcc_physical
|| reg
== ctx
.vcc_physical
+ 1)
238 NOPs
= std::max(NOPs
, 1);
243 switch (instr
->opcode
) {
244 case aco_opcode::v_readlane_b32
:
245 case aco_opcode::v_writelane_b32
: {
246 if (ctx
.VALU_wrsgpr
+ 4 < new_idx
)
248 PhysReg reg
= instr
->operands
[1].physReg();
249 for (int pred_idx
= new_idx
- 1; pred_idx
>= 0 && pred_idx
>= new_idx
- 4; pred_idx
--) {
250 aco_ptr
<Instruction
>& pred
= new_instructions
[pred_idx
];
251 if (!pred
->isVALU() || !VALU_writes_sgpr(pred
))
253 for (const Definition
& def
: pred
->definitions
) {
254 if (def
.physReg() == reg
)
255 NOPs
= std::max(NOPs
, 4 + pred_idx
- new_idx
+ 1);
260 case aco_opcode::v_div_fmas_f32
:
261 case aco_opcode::v_div_fmas_f64
: {
262 if (ctx
.VALU_wrvcc
+ 4 >= new_idx
)
263 NOPs
= std::max(NOPs
, 4 + ctx
.VALU_wrvcc
- new_idx
+ 1);
270 /* Write VGPRs holding writedata > 64 bit from MIMG/MUBUF instructions */
271 // FIXME: handle case if the last instruction of a block without branch is such store
272 // TODO: confirm that DS instructions cannot cause WAR hazards here
274 aco_ptr
<Instruction
>& pred
= new_instructions
.back();
275 if (pred
->isVMEM() &&
276 pred
->operands
.size() == 4 &&
277 pred
->operands
[3].size() > 2 &&
278 pred
->operands
[1].size() != 8 &&
279 (pred
->format
!= Format::MUBUF
|| pred
->operands
[2].physReg() >= 102)) {
280 /* Ops that use a 256-bit T# do not need a wait state.
281 * BUFFER_STORE_* operations that use an SGPR for "offset"
282 * do not require any wait states. */
283 PhysReg wrdata
= pred
->operands
[3].physReg();
284 unsigned size
= pred
->operands
[3].size();
285 assert(wrdata
>= 256);
286 for (const Definition
& def
: instr
->definitions
) {
287 if (regs_intersect(def
.physReg(), def
.size(), wrdata
, size
))
288 NOPs
= std::max(NOPs
, 1);
293 if (VALU_writes_sgpr(instr
)) {
294 for (const Definition
& def
: instr
->definitions
) {
295 if (def
.physReg() == vcc
)
296 ctx
.VALU_wrvcc
= NOPs
? new_idx
: new_idx
+ 1;
297 else if (def
.physReg() == exec
)
298 ctx
.VALU_wrexec
= NOPs
? new_idx
: new_idx
+ 1;
299 else if (def
.physReg() <= 102)
300 ctx
.VALU_wrsgpr
= NOPs
? new_idx
: new_idx
+ 1;
304 } else if (instr
->isVMEM() && ctx
.VALU_wrsgpr
+ 5 >= new_idx
) {
305 /* If the VALU writes the SGPR that is used by a VMEM, the user must add five wait states. */
306 for (int pred_idx
= new_idx
- 1; pred_idx
>= 0 && pred_idx
>= new_idx
- 5; pred_idx
--) {
307 aco_ptr
<Instruction
>& pred
= new_instructions
[pred_idx
];
308 if (!(pred
->isVALU() && VALU_writes_sgpr(pred
)))
311 for (const Definition
& def
: pred
->definitions
) {
312 if (def
.physReg() > 102)
315 if (instr
->operands
.size() > 1 &&
316 regs_intersect(instr
->operands
[1].physReg(), instr
->operands
[1].size(),
317 def
.physReg(), def
.size())) {
318 return 5 + pred_idx
- new_idx
+ 1;
321 if (instr
->operands
.size() > 2 &&
322 regs_intersect(instr
->operands
[2].physReg(), instr
->operands
[2].size(),
323 def
.physReg(), def
.size())) {
324 return 5 + pred_idx
- new_idx
+ 1;
333 std::pair
<int, int> handle_instruction_gfx10(NOP_ctx
& ctx
, aco_ptr
<Instruction
>& instr
,
334 std::vector
<aco_ptr
<Instruction
>>& old_instructions
,
335 std::vector
<aco_ptr
<Instruction
>>& new_instructions
)
337 int new_idx
= new_instructions
.size();
341 /* break off from prevous SMEM group ("clause" seems to mean something different in RDNA) if needed */
342 if (instr
->format
== Format::SMEM
)
343 sNOPs
= std::max(sNOPs
, handle_SMEM_clause(instr
, new_idx
, new_instructions
));
345 /* VMEMtoScalarWriteHazard
346 * Handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" in-between.
348 if (instr
->isVMEM() || instr
->format
== Format::FLAT
|| instr
->format
== Format::GLOBAL
||
349 instr
->format
== Format::SCRATCH
|| instr
->format
== Format::DS
) {
350 /* Remember all SGPRs that are read by the VMEM instruction */
351 mark_read_regs(instr
, ctx
.sgprs_read_by_VMEM
);
352 } else if (instr
->isSALU() || instr
->format
== Format::SMEM
) {
353 /* Check if SALU writes an SGPR that was previously read by the VALU */
354 if (check_written_regs(instr
, ctx
.sgprs_read_by_VMEM
)) {
355 ctx
.sgprs_read_by_VMEM
.reset();
357 /* Insert v_nop to mitigate the problem */
358 aco_ptr
<VOP1_instruction
> nop
{create_instruction
<VOP1_instruction
>(aco_opcode::v_nop
, Format::VOP1
, 0, 0)};
359 new_instructions
.emplace_back(std::move(nop
));
361 } else if (instr
->opcode
== aco_opcode::s_waitcnt
) {
362 /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
363 uint16_t imm
= static_cast<SOPP_instruction
*>(instr
.get())->imm
;
364 unsigned vmcnt
= (imm
& 0xF) | ((imm
& (0x3 << 14)) >> 10);
366 ctx
.sgprs_read_by_VMEM
.reset();
367 } else if (instr
->isVALU()) {
368 /* Hazard is mitigated by any VALU instruction */
369 ctx
.sgprs_read_by_VMEM
.reset();
372 /* VcmpxPermlaneHazard
373 * Handle any permlane following a VOPC instruction, insert v_mov between them.
375 if (instr
->format
== Format::VOPC
) {
377 } else if (ctx
.has_VOPC
&&
378 (instr
->opcode
== aco_opcode::v_permlane16_b32
||
379 instr
->opcode
== aco_opcode::v_permlanex16_b32
)) {
380 ctx
.has_VOPC
= false;
382 /* v_nop would be discarded by SQ, so use v_mov with the first operand of the permlane */
383 aco_ptr
<VOP1_instruction
> v_mov
{create_instruction
<VOP1_instruction
>(aco_opcode::v_mov_b32
, Format::VOP1
, 1, 1)};
384 v_mov
->definitions
[0] = Definition(instr
->operands
[0].physReg(), v1
);
385 v_mov
->operands
[0] = Operand(instr
->operands
[0].physReg(), v1
);
386 new_instructions
.emplace_back(std::move(v_mov
));
387 } else if (instr
->isVALU() && instr
->opcode
!= aco_opcode::v_nop
) {
388 ctx
.has_VOPC
= false;
391 /* VcmpxExecWARHazard
392 * Handle any VALU instruction writing the exec mask after it was read by a non-VALU instruction.
394 if (!instr
->isVALU() && instr_reads_exec(instr
)) {
395 ctx
.has_nonVALU_exec_read
= true;
396 } else if (instr
->isVALU()) {
397 if (instr_writes_exec(instr
)) {
398 ctx
.has_nonVALU_exec_read
= false;
400 /* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */
401 aco_ptr
<SOPP_instruction
> depctr
{create_instruction
<SOPP_instruction
>(aco_opcode::s_waitcnt_depctr
, Format::SOPP
, 0, 1)};
402 depctr
->imm
= 0xfffe;
403 depctr
->definitions
[0] = Definition(sgpr_null
, s1
);
404 new_instructions
.emplace_back(std::move(depctr
));
405 } else if (instr_writes_sgpr(instr
)) {
406 /* Any VALU instruction that writes an SGPR mitigates the problem */
407 ctx
.has_nonVALU_exec_read
= false;
409 } else if (instr
->opcode
== aco_opcode::s_waitcnt_depctr
) {
410 /* s_waitcnt_depctr can mitigate the problem if it has a magic imm */
411 const SOPP_instruction
*sopp
= static_cast<const SOPP_instruction
*>(instr
.get());
412 if ((sopp
->imm
& 0xfffe) == 0xfffe)
413 ctx
.has_nonVALU_exec_read
= false;
416 /* SMEMtoVectorWriteHazard
417 * Handle any VALU instruction writing an SGPR after an SMEM reads it.
419 if (instr
->format
== Format::SMEM
) {
420 /* Remember all SGPRs that are read by the SMEM instruction */
421 mark_read_regs(instr
, ctx
.sgprs_read_by_SMEM
);
422 } else if (VALU_writes_sgpr(instr
)) {
423 /* Check if VALU writes an SGPR that was previously read by SMEM */
424 if (check_written_regs(instr
, ctx
.sgprs_read_by_SMEM
)) {
425 ctx
.sgprs_read_by_SMEM
.reset();
427 /* Insert s_mov to mitigate the problem */
428 aco_ptr
<SOP1_instruction
> s_mov
{create_instruction
<SOP1_instruction
>(aco_opcode::s_mov_b32
, Format::SOP1
, 1, 1)};
429 s_mov
->definitions
[0] = Definition(sgpr_null
, s1
);
430 s_mov
->operands
[0] = Operand(0u);
431 new_instructions
.emplace_back(std::move(s_mov
));
433 } else if (instr
->isSALU()) {
434 if (instr
->format
!= Format::SOPP
) {
435 /* SALU can mitigate the hazard */
436 ctx
.sgprs_read_by_SMEM
.reset();
438 /* Reducing lgkmcnt count to 0 always mitigates the hazard. */
439 const SOPP_instruction
*sopp
= static_cast<const SOPP_instruction
*>(instr
.get());
440 if (sopp
->opcode
== aco_opcode::s_waitcnt_lgkmcnt
) {
441 if (sopp
->imm
== 0 && sopp
->definitions
[0].physReg() == sgpr_null
)
442 ctx
.sgprs_read_by_SMEM
.reset();
443 } else if (sopp
->opcode
== aco_opcode::s_waitcnt
) {
444 unsigned lgkm
= (sopp
->imm
>> 8) & 0x3f;
446 ctx
.sgprs_read_by_SMEM
.reset();
451 /* LdsBranchVmemWARHazard
452 * Handle VMEM/GLOBAL/SCRATCH->branch->DS and DS->branch->VMEM/GLOBAL/SCRATCH patterns.
454 if (instr
->isVMEM() || instr
->format
== Format::GLOBAL
|| instr
->format
== Format::SCRATCH
) {
456 ctx
.has_branch_after_VMEM
= false;
457 /* Mitigation for DS is needed only if there was already a branch after */
458 ctx
.has_DS
= ctx
.has_branch_after_DS
;
459 } else if (instr
->format
== Format::DS
) {
461 ctx
.has_branch_after_DS
= false;
462 /* Mitigation for VMEM is needed only if there was already a branch after */
463 ctx
.has_VMEM
= ctx
.has_branch_after_VMEM
;
464 } else if (instr_is_branch(instr
)) {
465 ctx
.has_branch_after_VMEM
= ctx
.has_VMEM
;
466 ctx
.has_branch_after_DS
= ctx
.has_DS
;
467 } else if (instr
->opcode
== aco_opcode::s_waitcnt_vscnt
) {
468 /* Only s_waitcnt_vscnt can mitigate the hazard */
469 const SOPK_instruction
*sopk
= static_cast<const SOPK_instruction
*>(instr
.get());
470 if (sopk
->definitions
[0].physReg() == sgpr_null
&& sopk
->imm
== 0)
471 ctx
.has_VMEM
= ctx
.has_branch_after_VMEM
= ctx
.has_DS
= ctx
.has_branch_after_DS
= false;
473 if ((ctx
.has_VMEM
&& ctx
.has_branch_after_DS
) || (ctx
.has_DS
&& ctx
.has_branch_after_VMEM
)) {
474 ctx
.has_VMEM
= ctx
.has_branch_after_VMEM
= ctx
.has_DS
= ctx
.has_branch_after_DS
= false;
476 /* Insert s_waitcnt_vscnt to mitigate the problem */
477 aco_ptr
<SOPK_instruction
> wait
{create_instruction
<SOPK_instruction
>(aco_opcode::s_waitcnt_vscnt
, Format::SOPK
, 0, 1)};
478 wait
->definitions
[0] = Definition(sgpr_null
, s1
);
480 new_instructions
.emplace_back(std::move(wait
));
483 return std::make_pair(sNOPs
, vNOPs
);
487 void handle_block(NOP_ctx
& ctx
, Block
& block
)
489 std::vector
<aco_ptr
<Instruction
>> instructions
;
490 instructions
.reserve(block
.instructions
.size());
491 for (unsigned i
= 0; i
< block
.instructions
.size(); i
++) {
492 aco_ptr
<Instruction
>& instr
= block
.instructions
[i
];
493 unsigned NOPs
= handle_instruction(ctx
, instr
, block
.instructions
, instructions
);
495 // TODO: try to move the instruction down
497 aco_ptr
<SOPP_instruction
> nop
{create_instruction
<SOPP_instruction
>(aco_opcode::s_nop
, Format::SOPP
, 0, 0)};
500 instructions
.emplace_back(std::move(nop
));
503 instructions
.emplace_back(std::move(instr
));
506 ctx
.VALU_wrvcc
-= instructions
.size();
507 ctx
.VALU_wrexec
-= instructions
.size();
508 ctx
.VALU_wrsgpr
-= instructions
.size();
509 block
.instructions
= std::move(instructions
);
512 void handle_block_gfx10(NOP_ctx
& ctx
, Block
& block
)
514 std::vector
<aco_ptr
<Instruction
>> instructions
;
515 instructions
.reserve(block
.instructions
.size());
516 for (unsigned i
= 0; i
< block
.instructions
.size(); i
++) {
517 aco_ptr
<Instruction
>& instr
= block
.instructions
[i
];
518 std::pair
<int, int> NOPs
= handle_instruction_gfx10(ctx
, instr
, block
.instructions
, instructions
);
519 for (int i
= 0; i
< NOPs
.second
; i
++) {
520 // TODO: try to move the instruction down
522 aco_ptr
<VOP1_instruction
> nop
{create_instruction
<VOP1_instruction
>(aco_opcode::v_nop
, Format::VOP1
, 0, 0)};
523 instructions
.emplace_back(std::move(nop
));
526 // TODO: try to move the instruction down
528 aco_ptr
<SOPP_instruction
> nop
{create_instruction
<SOPP_instruction
>(aco_opcode::s_nop
, Format::SOPP
, 0, 0)};
529 nop
->imm
= NOPs
.first
- 1;
531 instructions
.emplace_back(std::move(nop
));
534 instructions
.emplace_back(std::move(instr
));
537 block
.instructions
= std::move(instructions
);
540 } /* end namespace */
543 void insert_NOPs(Program
* program
)
545 NOP_ctx
ctx(program
);
547 for (Block
& block
: program
->blocks
) {
548 if (block
.instructions
.empty())
551 if (ctx
.chip_class
>= GFX10
)
552 handle_block_gfx10(ctx
, block
);
554 handle_block(ctx
, block
);