2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include <unordered_set>
29 #include "vulkan/radv_shader.h" // for radv_nir_compiler_options
30 #include "amdgfxregs.h"
32 #define SMEM_WINDOW_SIZE (350 - ctx.num_waves * 35)
33 #define VMEM_WINDOW_SIZE (1024 - ctx.num_waves * 64)
34 #define POS_EXP_WINDOW_SIZE 512
35 #define SMEM_MAX_MOVES (80 - ctx.num_waves * 8)
36 #define VMEM_MAX_MOVES (128 - ctx.num_waves * 4)
37 #define POS_EXP_MAX_MOVES 512
42 std::vector
<bool> depends_on
;
43 std::vector
<bool> RAR_dependencies
;
44 RegisterDemand max_registers
;
46 int16_t last_SMEM_stall
;
47 int last_SMEM_dep_idx
;
50 /* This scheduler is a simple bottom-up pass based on ideas from
51 * "A Novel Lightweight Instruction Scheduling Algorithm for Just-In-Time Compiler"
52 * from Xiaohua Shi and Peng Guo.
53 * The basic approach is to iterate over all instructions. When a memory instruction
54 * is encountered it tries to move independent instructions from above and below
55 * between the memory instruction and it's first user.
56 * The novelty is that this scheduler cares for the current register pressure:
57 * Instructions will only be moved if the register pressure won't exceed a certain bound.
61 void move_element(T
& list
, size_t idx
, size_t before
) {
63 auto begin
= std::next(list
.begin(), idx
);
64 auto end
= std::next(list
.begin(), before
);
65 std::rotate(begin
, begin
+ 1, end
);
66 } else if (idx
> before
) {
67 auto begin
= std::next(list
.begin(), before
);
68 auto end
= std::next(list
.begin(), idx
+ 1);
69 std::rotate(begin
, end
- 1, end
);
73 static RegisterDemand
getLiveChanges(aco_ptr
<Instruction
>& instr
)
75 RegisterDemand changes
;
76 for (const Definition
& def
: instr
->definitions
) {
77 if (!def
.isTemp() || def
.isKill())
79 changes
+= def
.getTemp();
82 for (const Operand
& op
: instr
->operands
) {
83 if (!op
.isTemp() || !op
.isFirstKill())
85 changes
-= op
.getTemp();
91 static RegisterDemand
getTempRegisters(aco_ptr
<Instruction
>& instr
)
93 RegisterDemand temp_registers
;
94 for (const Definition
& def
: instr
->definitions
) {
95 if (!def
.isTemp() || !def
.isKill())
97 temp_registers
+= def
.getTemp();
99 return temp_registers
;
102 static bool is_spill_reload(aco_ptr
<Instruction
>& instr
)
104 return instr
->opcode
== aco_opcode::p_spill
|| instr
->opcode
== aco_opcode::p_reload
;
107 bool can_move_instr(aco_ptr
<Instruction
>& instr
, Instruction
* current
, int moving_interaction
)
109 /* don't move exports so that they stay closer together */
110 if (instr
->format
== Format::EXP
)
113 /* handle barriers */
115 /* TODO: instead of stopping, maybe try to move the barriers and any
116 * instructions interacting with them instead? */
117 if (instr
->format
!= Format::PSEUDO_BARRIER
) {
118 if (instr
->opcode
== aco_opcode::s_barrier
) {
119 bool can_reorder
= false;
120 switch (current
->format
) {
122 can_reorder
= static_cast<SMEM_instruction
*>(current
)->can_reorder
;
125 can_reorder
= static_cast<MUBUF_instruction
*>(current
)->can_reorder
;
128 can_reorder
= static_cast<MIMG_instruction
*>(current
)->can_reorder
;
133 return can_reorder
&& moving_interaction
== barrier_none
;
139 int interaction
= get_barrier_interaction(current
);
140 interaction
|= moving_interaction
;
142 switch (instr
->opcode
) {
143 case aco_opcode::p_memory_barrier_atomic
:
144 return !(interaction
& barrier_atomic
);
145 /* For now, buffer and image barriers are treated the same. this is because of
146 * dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.buffer.guard_nonlocal.image.comp
147 * which seems to use an image load to determine if the result of a buffer load is valid. So the ordering of the two loads is important.
148 * I /think/ we should probably eventually expand the meaning of a buffer barrier so that all buffer operations before it, must stay before it
149 * and that both image and buffer operations after it, must stay after it. We should also do the same for image barriers.
150 * Or perhaps the problem is that we don't have a combined barrier instruction for both buffers and images, but the CTS test expects us to?
151 * Either way, this solution should work. */
152 case aco_opcode::p_memory_barrier_buffer
:
153 case aco_opcode::p_memory_barrier_image
:
154 return !(interaction
& (barrier_image
| barrier_buffer
));
155 case aco_opcode::p_memory_barrier_shared
:
156 return !(interaction
& barrier_shared
);
157 case aco_opcode::p_memory_barrier_all
:
158 return interaction
== barrier_none
;
164 bool can_reorder(Instruction
* candidate
, bool allow_smem
)
166 switch (candidate
->format
) {
168 return allow_smem
|| static_cast<SMEM_instruction
*>(candidate
)->can_reorder
;
170 return static_cast<MUBUF_instruction
*>(candidate
)->can_reorder
;
172 return static_cast<MIMG_instruction
*>(candidate
)->can_reorder
;
174 return static_cast<MTBUF_instruction
*>(candidate
)->can_reorder
;
177 case Format::SCRATCH
:
184 void schedule_SMEM(sched_ctx
& ctx
, Block
* block
,
185 std::vector
<RegisterDemand
>& register_demand
,
186 Instruction
* current
, int idx
)
189 int window_size
= SMEM_WINDOW_SIZE
;
190 int max_moves
= SMEM_MAX_MOVES
;
192 bool can_reorder_cur
= can_reorder(current
, false);
194 /* create the initial set of values which current depends on */
195 std::fill(ctx
.depends_on
.begin(), ctx
.depends_on
.end(), false);
196 for (const Operand
& op
: current
->operands
) {
198 ctx
.depends_on
[op
.tempId()] = true;
201 /* maintain how many registers remain free when moving instructions */
202 RegisterDemand register_pressure
= register_demand
[idx
];
204 /* first, check if we have instructions before current to move down */
205 int insert_idx
= idx
+ 1;
206 int moving_interaction
= barrier_none
;
207 bool moving_spill
= false;
209 for (int candidate_idx
= idx
- 1; k
< max_moves
&& candidate_idx
> (int) idx
- window_size
; candidate_idx
--) {
210 assert(candidate_idx
>= 0);
211 aco_ptr
<Instruction
>& candidate
= block
->instructions
[candidate_idx
];
213 /* break if we'd make the previous SMEM instruction stall */
214 bool can_stall_prev_smem
= idx
<= ctx
.last_SMEM_dep_idx
&& candidate_idx
< ctx
.last_SMEM_dep_idx
;
215 if (can_stall_prev_smem
&& ctx
.last_SMEM_stall
>= 0)
218 /* break when encountering another MEM instruction, logical_start or barriers */
219 if (!can_reorder(candidate
.get(), false) && !can_reorder_cur
)
221 if (candidate
->opcode
== aco_opcode::p_logical_start
)
223 if (!can_move_instr(candidate
, current
, moving_interaction
))
225 register_pressure
.update(register_demand
[candidate_idx
]);
227 /* if current depends on candidate, add additional dependencies and continue */
228 bool can_move_down
= true;
229 bool writes_exec
= false;
230 for (const Definition
& def
: candidate
->definitions
) {
231 if (def
.isTemp() && ctx
.depends_on
[def
.tempId()])
232 can_move_down
= false;
233 if (def
.isFixed() && def
.physReg() == exec
)
239 if (moving_spill
&& is_spill_reload(candidate
))
240 can_move_down
= false;
241 if ((moving_interaction
& barrier_shared
) && candidate
->format
== Format::DS
)
242 can_move_down
= false;
243 moving_interaction
|= get_barrier_interaction(candidate
.get());
244 moving_spill
|= is_spill_reload(candidate
);
245 if (!can_move_down
) {
246 for (const Operand
& op
: candidate
->operands
) {
248 ctx
.depends_on
[op
.tempId()] = true;
253 bool register_pressure_unknown
= false;
254 /* check if one of candidate's operands is killed by depending instruction */
255 for (const Operand
& op
: candidate
->operands
) {
256 if (op
.isTemp() && ctx
.depends_on
[op
.tempId()]) {
257 // FIXME: account for difference in register pressure
258 register_pressure_unknown
= true;
261 if (register_pressure_unknown
) {
262 for (const Operand
& op
: candidate
->operands
) {
264 ctx
.depends_on
[op
.tempId()] = true;
269 /* check if register pressure is low enough: the diff is negative if register pressure is decreased */
270 const RegisterDemand candidate_diff
= getLiveChanges(candidate
);
271 const RegisterDemand tempDemand
= getTempRegisters(candidate
);
272 if (RegisterDemand(register_pressure
- candidate_diff
).exceeds(ctx
.max_registers
))
274 const RegisterDemand tempDemand2
= getTempRegisters(block
->instructions
[insert_idx
- 1]);
275 const RegisterDemand new_demand
= register_demand
[insert_idx
- 1] - tempDemand2
+ tempDemand
;
276 if (new_demand
.exceeds(ctx
.max_registers
))
278 // TODO: we might want to look further to find a sequence of instructions to move down which doesn't exceed reg pressure
280 /* move the candidate below the memory load */
281 move_element(block
->instructions
, candidate_idx
, insert_idx
);
283 /* update register pressure */
284 move_element(register_demand
, candidate_idx
, insert_idx
);
285 for (int i
= candidate_idx
; i
< insert_idx
- 1; i
++) {
286 register_demand
[i
] -= candidate_diff
;
288 register_demand
[insert_idx
- 1] = new_demand
;
289 register_pressure
-= candidate_diff
;
291 if (candidate_idx
< ctx
.last_SMEM_dep_idx
)
292 ctx
.last_SMEM_stall
++;
297 /* create the initial set of values which depend on current */
298 std::fill(ctx
.depends_on
.begin(), ctx
.depends_on
.end(), false);
299 std::fill(ctx
.RAR_dependencies
.begin(), ctx
.RAR_dependencies
.end(), false);
300 for (const Definition
& def
: current
->definitions
) {
302 ctx
.depends_on
[def
.tempId()] = true;
305 /* find the first instruction depending on current or find another MEM */
306 insert_idx
= idx
+ 1;
307 moving_interaction
= barrier_none
;
308 moving_spill
= false;
310 bool found_dependency
= false;
311 /* second, check if we have instructions after current to move up */
312 for (int candidate_idx
= idx
+ 1; k
< max_moves
&& candidate_idx
< (int) idx
+ window_size
; candidate_idx
++) {
313 assert(candidate_idx
< (int) block
->instructions
.size());
314 aco_ptr
<Instruction
>& candidate
= block
->instructions
[candidate_idx
];
316 if (candidate
->opcode
== aco_opcode::p_logical_end
)
318 if (!can_move_instr(candidate
, current
, moving_interaction
))
321 const bool writes_exec
= std::any_of(candidate
->definitions
.begin(), candidate
->definitions
.end(),
322 [](const Definition
& def
) { return def
.isFixed() && def
.physReg() == exec
;});
326 /* check if candidate depends on current */
327 bool is_dependency
= std::any_of(candidate
->operands
.begin(), candidate
->operands
.end(),
328 [&ctx
](const Operand
& op
) { return op
.isTemp() && ctx
.depends_on
[op
.tempId()];});
329 if (moving_spill
&& is_spill_reload(candidate
))
330 is_dependency
= true;
331 if ((moving_interaction
& barrier_shared
) && candidate
->format
== Format::DS
)
332 is_dependency
= true;
333 moving_interaction
|= get_barrier_interaction(candidate
.get());
334 moving_spill
|= is_spill_reload(candidate
);
336 for (const Definition
& def
: candidate
->definitions
) {
338 ctx
.depends_on
[def
.tempId()] = true;
340 for (const Operand
& op
: candidate
->operands
) {
342 ctx
.RAR_dependencies
[op
.tempId()] = true;
344 if (!found_dependency
) {
345 insert_idx
= candidate_idx
;
346 found_dependency
= true;
347 /* init register pressure */
348 register_pressure
= register_demand
[insert_idx
- 1];
352 if (!can_reorder(candidate
.get(), false) && !can_reorder_cur
)
355 if (!found_dependency
) {
360 /* update register pressure */
361 register_pressure
.update(register_demand
[candidate_idx
- 1]);
365 assert(insert_idx
!= idx
);
367 // TODO: correctly calculate register pressure for this case
368 bool register_pressure_unknown
= false;
369 /* check if candidate uses/kills an operand which is used by a dependency */
370 for (const Operand
& op
: candidate
->operands
) {
371 if (op
.isTemp() && ctx
.RAR_dependencies
[op
.tempId()])
372 register_pressure_unknown
= true;
374 if (register_pressure_unknown
) {
375 for (const Definition
& def
: candidate
->definitions
) {
377 ctx
.RAR_dependencies
[def
.tempId()] = true;
379 for (const Operand
& op
: candidate
->operands
) {
381 ctx
.RAR_dependencies
[op
.tempId()] = true;
386 /* check if register pressure is low enough: the diff is negative if register pressure is decreased */
387 const RegisterDemand candidate_diff
= getLiveChanges(candidate
);
388 const RegisterDemand temp
= getTempRegisters(candidate
);
389 if (RegisterDemand(register_pressure
+ candidate_diff
).exceeds(ctx
.max_registers
))
391 const RegisterDemand temp2
= getTempRegisters(block
->instructions
[insert_idx
- 1]);
392 const RegisterDemand new_demand
= register_demand
[insert_idx
- 1] - temp2
+ candidate_diff
+ temp
;
393 if (new_demand
.exceeds(ctx
.max_registers
))
396 /* move the candidate above the insert_idx */
397 move_element(block
->instructions
, candidate_idx
, insert_idx
);
399 /* update register pressure */
400 move_element(register_demand
, candidate_idx
, insert_idx
);
401 for (int i
= insert_idx
+ 1; i
<= candidate_idx
; i
++) {
402 register_demand
[i
] += candidate_diff
;
404 register_demand
[insert_idx
] = new_demand
;
405 register_pressure
+= candidate_diff
;
410 ctx
.last_SMEM_dep_idx
= found_dependency
? insert_idx
: 0;
411 ctx
.last_SMEM_stall
= 10 - ctx
.num_waves
- k
;
414 void schedule_VMEM(sched_ctx
& ctx
, Block
* block
,
415 std::vector
<RegisterDemand
>& register_demand
,
416 Instruction
* current
, int idx
)
419 int window_size
= VMEM_WINDOW_SIZE
;
420 int max_moves
= VMEM_MAX_MOVES
;
422 bool can_reorder_cur
= can_reorder(current
, false);
424 /* create the initial set of values which current depends on */
425 std::fill(ctx
.depends_on
.begin(), ctx
.depends_on
.end(), false);
426 for (const Operand
& op
: current
->operands
) {
428 ctx
.depends_on
[op
.tempId()] = true;
431 /* maintain how many registers remain free when moving instructions */
432 RegisterDemand register_pressure
= register_demand
[idx
];
434 /* first, check if we have instructions before current to move down */
435 int insert_idx
= idx
+ 1;
436 int moving_interaction
= barrier_none
;
437 bool moving_spill
= false;
439 for (int candidate_idx
= idx
- 1; k
< max_moves
&& candidate_idx
> (int) idx
- window_size
; candidate_idx
--) {
440 assert(candidate_idx
>= 0);
441 aco_ptr
<Instruction
>& candidate
= block
->instructions
[candidate_idx
];
443 /* break when encountering another VMEM instruction, logical_start or barriers */
444 if (!can_reorder(candidate
.get(), true) && !can_reorder_cur
)
446 if (candidate
->opcode
== aco_opcode::p_logical_start
)
448 if (!can_move_instr(candidate
, current
, moving_interaction
))
451 /* break if we'd make the previous SMEM instruction stall */
452 bool can_stall_prev_smem
= idx
<= ctx
.last_SMEM_dep_idx
&& candidate_idx
< ctx
.last_SMEM_dep_idx
;
453 if (can_stall_prev_smem
&& ctx
.last_SMEM_stall
>= 0)
455 register_pressure
.update(register_demand
[candidate_idx
]);
457 /* if current depends on candidate, add additional dependencies and continue */
458 bool can_move_down
= true;
459 bool writes_exec
= false;
460 for (const Definition
& def
: candidate
->definitions
) {
461 if (def
.isTemp() && ctx
.depends_on
[def
.tempId()])
462 can_move_down
= false;
463 if (def
.isFixed() && def
.physReg() == exec
)
469 if (moving_spill
&& is_spill_reload(candidate
))
470 can_move_down
= false;
471 if ((moving_interaction
& barrier_shared
) && candidate
->format
== Format::DS
)
472 can_move_down
= false;
473 moving_interaction
|= get_barrier_interaction(candidate
.get());
474 moving_spill
|= is_spill_reload(candidate
);
475 if (!can_move_down
) {
476 for (const Operand
& op
: candidate
->operands
) {
478 ctx
.depends_on
[op
.tempId()] = true;
483 bool register_pressure_unknown
= false;
484 /* check if one of candidate's operands is killed by depending instruction */
485 for (const Operand
& op
: candidate
->operands
) {
486 if (op
.isTemp() && ctx
.depends_on
[op
.tempId()]) {
487 // FIXME: account for difference in register pressure
488 register_pressure_unknown
= true;
491 if (register_pressure_unknown
) {
492 for (const Operand
& op
: candidate
->operands
) {
494 ctx
.depends_on
[op
.tempId()] = true;
499 /* check if register pressure is low enough: the diff is negative if register pressure is decreased */
500 const RegisterDemand candidate_diff
= getLiveChanges(candidate
);
501 const RegisterDemand temp
= getTempRegisters(candidate
);;
502 if (RegisterDemand(register_pressure
- candidate_diff
).exceeds(ctx
.max_registers
))
504 const RegisterDemand temp2
= getTempRegisters(block
->instructions
[insert_idx
- 1]);
505 const RegisterDemand new_demand
= register_demand
[insert_idx
- 1] - temp2
+ temp
;
506 if (new_demand
.exceeds(ctx
.max_registers
))
508 // TODO: we might want to look further to find a sequence of instructions to move down which doesn't exceed reg pressure
510 /* move the candidate below the memory load */
511 move_element(block
->instructions
, candidate_idx
, insert_idx
);
513 /* update register pressure */
514 move_element(register_demand
, candidate_idx
, insert_idx
);
515 for (int i
= candidate_idx
; i
< insert_idx
- 1; i
++) {
516 register_demand
[i
] -= candidate_diff
;
518 register_demand
[insert_idx
- 1] = new_demand
;
519 register_pressure
-= candidate_diff
;
522 if (candidate_idx
< ctx
.last_SMEM_dep_idx
)
523 ctx
.last_SMEM_stall
++;
526 /* create the initial set of values which depend on current */
527 std::fill(ctx
.depends_on
.begin(), ctx
.depends_on
.end(), false);
528 std::fill(ctx
.RAR_dependencies
.begin(), ctx
.RAR_dependencies
.end(), false);
529 for (const Definition
& def
: current
->definitions
) {
531 ctx
.depends_on
[def
.tempId()] = true;
534 /* find the first instruction depending on current or find another VMEM */
536 moving_interaction
= barrier_none
;
537 moving_spill
= false;
539 bool found_dependency
= false;
540 /* second, check if we have instructions after current to move up */
541 for (int candidate_idx
= idx
+ 1; k
< max_moves
&& candidate_idx
< (int) idx
+ window_size
; candidate_idx
++) {
542 assert(candidate_idx
< (int) block
->instructions
.size());
543 aco_ptr
<Instruction
>& candidate
= block
->instructions
[candidate_idx
];
545 if (candidate
->opcode
== aco_opcode::p_logical_end
)
547 if (!can_move_instr(candidate
, current
, moving_interaction
))
550 const bool writes_exec
= std::any_of(candidate
->definitions
.begin(), candidate
->definitions
.end(),
551 [](const Definition
& def
) {return def
.isFixed() && def
.physReg() == exec
; });
555 /* check if candidate depends on current */
556 bool is_dependency
= !can_reorder(candidate
.get(), true) && !can_reorder_cur
;
557 for (const Operand
& op
: candidate
->operands
) {
558 if (op
.isTemp() && ctx
.depends_on
[op
.tempId()]) {
559 is_dependency
= true;
563 if (moving_spill
&& is_spill_reload(candidate
))
564 is_dependency
= true;
565 if ((moving_interaction
& barrier_shared
) && candidate
->format
== Format::DS
)
566 is_dependency
= true;
567 moving_interaction
|= get_barrier_interaction(candidate
.get());
568 moving_spill
|= is_spill_reload(candidate
);
570 for (const Definition
& def
: candidate
->definitions
) {
572 ctx
.depends_on
[def
.tempId()] = true;
574 for (const Operand
& op
: candidate
->operands
) {
576 ctx
.RAR_dependencies
[op
.tempId()] = true;
578 if (!found_dependency
) {
579 insert_idx
= candidate_idx
;
580 found_dependency
= true;
581 /* init register pressure */
582 register_pressure
= register_demand
[insert_idx
- 1];
587 /* update register pressure */
588 register_pressure
.update(register_demand
[candidate_idx
- 1]);
590 if (is_dependency
|| !found_dependency
)
592 assert(insert_idx
!= idx
);
594 bool register_pressure_unknown
= false;
595 /* check if candidate uses/kills an operand which is used by a dependency */
596 for (const Operand
& op
: candidate
->operands
) {
597 if (op
.isTemp() && ctx
.RAR_dependencies
[op
.tempId()])
598 register_pressure_unknown
= true;
600 if (register_pressure_unknown
) {
601 for (const Definition
& def
: candidate
->definitions
) {
603 ctx
.RAR_dependencies
[def
.tempId()] = true;
605 for (const Operand
& op
: candidate
->operands
) {
607 ctx
.RAR_dependencies
[op
.tempId()] = true;
612 /* check if register pressure is low enough: the diff is negative if register pressure is decreased */
613 const RegisterDemand candidate_diff
= getLiveChanges(candidate
);
614 const RegisterDemand temp
= getTempRegisters(candidate
);
615 if (RegisterDemand(register_pressure
+ candidate_diff
).exceeds(ctx
.max_registers
))
617 const RegisterDemand temp2
= getTempRegisters(block
->instructions
[insert_idx
- 1]);
618 const RegisterDemand new_demand
= register_demand
[insert_idx
- 1] - temp2
+ candidate_diff
+ temp
;
619 if (new_demand
.exceeds(ctx
.max_registers
))
622 /* move the candidate above the insert_idx */
623 move_element(block
->instructions
, candidate_idx
, insert_idx
);
625 /* update register pressure */
626 move_element(register_demand
, candidate_idx
, insert_idx
);
627 for (int i
= insert_idx
+ 1; i
<= candidate_idx
; i
++) {
628 register_demand
[i
] += candidate_diff
;
630 register_demand
[insert_idx
] = new_demand
;
631 register_pressure
+= candidate_diff
;
637 void schedule_position_export(sched_ctx
& ctx
, Block
* block
,
638 std::vector
<RegisterDemand
>& register_demand
,
639 Instruction
* current
, int idx
)
642 int window_size
= POS_EXP_WINDOW_SIZE
;
643 int max_moves
= POS_EXP_MAX_MOVES
;
646 /* create the initial set of values which current depends on */
647 std::fill(ctx
.depends_on
.begin(), ctx
.depends_on
.end(), false);
648 for (unsigned i
= 0; i
< current
->operands
.size(); i
++) {
649 if (current
->operands
[i
].isTemp())
650 ctx
.depends_on
[current
->operands
[i
].tempId()] = true;
653 /* maintain how many registers remain free when moving instructions */
654 RegisterDemand register_pressure
= register_demand
[idx
];
656 /* first, check if we have instructions before current to move down */
657 int insert_idx
= idx
+ 1;
658 int moving_interaction
= barrier_none
;
659 bool moving_spill
= false;
661 for (int candidate_idx
= idx
- 1; k
< max_moves
&& candidate_idx
> (int) idx
- window_size
; candidate_idx
--) {
662 assert(candidate_idx
>= 0);
663 aco_ptr
<Instruction
>& candidate
= block
->instructions
[candidate_idx
];
665 /* break when encountering logical_start or barriers */
666 if (candidate
->opcode
== aco_opcode::p_logical_start
)
668 if (candidate
->isVMEM() || candidate
->format
== Format::SMEM
)
670 if (!can_move_instr(candidate
, current
, moving_interaction
))
673 register_pressure
.update(register_demand
[candidate_idx
]);
675 /* if current depends on candidate, add additional dependencies and continue */
676 bool can_move_down
= true;
677 bool writes_exec
= false;
678 for (unsigned i
= 0; i
< candidate
->definitions
.size(); i
++) {
679 if (candidate
->definitions
[i
].isTemp() && ctx
.depends_on
[candidate
->definitions
[i
].tempId()])
680 can_move_down
= false;
681 if (candidate
->definitions
[i
].isFixed() && candidate
->definitions
[i
].physReg() == exec
)
687 if (moving_spill
&& is_spill_reload(candidate
))
688 can_move_down
= false;
689 if ((moving_interaction
& barrier_shared
) && candidate
->format
== Format::DS
)
690 can_move_down
= false;
691 moving_interaction
|= get_barrier_interaction(candidate
.get());
692 moving_spill
|= is_spill_reload(candidate
);
693 if (!can_move_down
) {
694 for (unsigned i
= 0; i
< candidate
->operands
.size(); i
++) {
695 if (candidate
->operands
[i
].isTemp())
696 ctx
.depends_on
[candidate
->operands
[i
].tempId()] = true;
701 bool register_pressure_unknown
= false;
702 /* check if one of candidate's operands is killed by depending instruction */
703 for (unsigned i
= 0; i
< candidate
->operands
.size(); i
++) {
704 if (candidate
->operands
[i
].isTemp() && ctx
.depends_on
[candidate
->operands
[i
].tempId()]) {
705 // FIXME: account for difference in register pressure
706 register_pressure_unknown
= true;
709 if (register_pressure_unknown
) {
710 for (unsigned i
= 0; i
< candidate
->operands
.size(); i
++) {
711 if (candidate
->operands
[i
].isTemp())
712 ctx
.depends_on
[candidate
->operands
[i
].tempId()] = true;
717 /* check if register pressure is low enough: the diff is negative if register pressure is decreased */
718 const RegisterDemand candidate_diff
= getLiveChanges(candidate
);
719 const RegisterDemand temp
= getTempRegisters(candidate
);;
720 if (RegisterDemand(register_pressure
- candidate_diff
).exceeds(ctx
.max_registers
))
722 const RegisterDemand temp2
= getTempRegisters(block
->instructions
[insert_idx
- 1]);
723 const RegisterDemand new_demand
= register_demand
[insert_idx
- 1] - temp2
+ temp
;
724 if (new_demand
.exceeds(ctx
.max_registers
))
726 // TODO: we might want to look further to find a sequence of instructions to move down which doesn't exceed reg pressure
728 /* move the candidate below the export */
729 move_element(block
->instructions
, candidate_idx
, insert_idx
);
731 /* update register pressure */
732 move_element(register_demand
, candidate_idx
, insert_idx
);
733 for (int i
= candidate_idx
; i
< insert_idx
- 1; i
++) {
734 register_demand
[i
] -= candidate_diff
;
736 register_demand
[insert_idx
- 1] = new_demand
;
737 register_pressure
-= candidate_diff
;
743 void schedule_block(sched_ctx
& ctx
, Program
*program
, Block
* block
, live
& live_vars
)
745 ctx
.last_SMEM_dep_idx
= 0;
746 ctx
.last_SMEM_stall
= INT16_MIN
;
748 /* go through all instructions and find memory loads */
749 for (unsigned idx
= 0; idx
< block
->instructions
.size(); idx
++) {
750 Instruction
* current
= block
->instructions
[idx
].get();
752 if (current
->definitions
.empty())
755 if (current
->isVMEM())
756 schedule_VMEM(ctx
, block
, live_vars
.register_demand
[block
->index
], current
, idx
);
757 if (current
->format
== Format::SMEM
)
758 schedule_SMEM(ctx
, block
, live_vars
.register_demand
[block
->index
], current
, idx
);
761 if ((program
->stage
& hw_vs
) && block
->index
== program
->blocks
.size() - 1) {
762 /* Try to move position exports as far up as possible, to reduce register
763 * usage and because ISA reference guides say so. */
764 for (unsigned idx
= 0; idx
< block
->instructions
.size(); idx
++) {
765 Instruction
* current
= block
->instructions
[idx
].get();
767 if (current
->format
== Format::EXP
) {
768 unsigned target
= static_cast<Export_instruction
*>(current
)->dest
;
769 if (target
>= V_008DFC_SQ_EXP_POS
&& target
< V_008DFC_SQ_EXP_PARAM
)
770 schedule_position_export(ctx
, block
, live_vars
.register_demand
[block
->index
], current
, idx
);
775 /* resummarize the block's register demand */
776 block
->register_demand
= RegisterDemand();
777 for (unsigned idx
= 0; idx
< block
->instructions
.size(); idx
++) {
778 block
->register_demand
.update(live_vars
.register_demand
[block
->index
][idx
]);
783 void schedule_program(Program
*program
, live
& live_vars
)
786 ctx
.depends_on
.resize(program
->peekAllocationId());
787 ctx
.RAR_dependencies
.resize(program
->peekAllocationId());
788 /* Allowing the scheduler to reduce the number of waves to as low as 5
789 * improves performance of Thrones of Britannia significantly and doesn't
790 * seem to hurt anything else. */
791 //TODO: maybe use some sort of heuristic instead
792 //TODO: this also increases window-size/max-moves? did I realize that at the time?
793 ctx
.num_waves
= std::min
<uint16_t>(program
->num_waves
, 5);
794 assert(ctx
.num_waves
);
795 uint16_t total_sgpr_regs
= program
->chip_class
>= GFX8
? 800 : 512;
796 uint16_t max_addressible_sgpr
= program
->sgpr_limit
;
797 ctx
.max_registers
= { int16_t(((256 / ctx
.num_waves
) & ~3) - 2), std::min
<int16_t>(((total_sgpr_regs
/ ctx
.num_waves
) & ~7) - 2, max_addressible_sgpr
)};
799 for (Block
& block
: program
->blocks
)
800 schedule_block(ctx
, program
, &block
, live_vars
);
802 /* update max_reg_demand and num_waves */
803 RegisterDemand new_demand
;
804 for (Block
& block
: program
->blocks
) {
805 new_demand
.update(block
.register_demand
);
807 update_vgpr_sgpr_demand(program
, new_demand
);
809 /* if enabled, this code asserts that register_demand is updated correctly */
811 int prev_num_waves
= program
->num_waves
;
812 const RegisterDemand prev_max_demand
= program
->max_reg_demand
;
814 std::vector
<RegisterDemand
> demands(program
->blocks
.size());
815 for (unsigned j
= 0; j
< program
->blocks
.size(); j
++) {
816 demands
[j
] = program
->blocks
[j
].register_demand
;
819 struct radv_nir_compiler_options options
;
820 options
.chip_class
= program
->chip_class
;
821 live live_vars2
= aco::live_var_analysis(program
, &options
);
823 for (unsigned j
= 0; j
< program
->blocks
.size(); j
++) {
824 Block
&b
= program
->blocks
[j
];
825 for (unsigned i
= 0; i
< b
.instructions
.size(); i
++)
826 assert(live_vars
.register_demand
[b
.index
][i
] == live_vars2
.register_demand
[b
.index
][i
]);
827 assert(b
.register_demand
== demands
[j
]);
830 assert(program
->max_reg_demand
== prev_max_demand
);
831 assert(program
->num_waves
== prev_num_waves
);