2 * Copyright © 2018 Valve Corporation
3 * Copyright © 2018 Google
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "aco_builder.h"
34 * Implements the spilling algorithm on SSA-form from
35 * "Register Spilling and Live-Range Splitting for SSA-Form Programs"
36 * by Matthias Braun and Sebastian Hack.
48 RegisterDemand target_pressure
;
50 std::vector
<std::vector
<RegisterDemand
>> register_demand
;
51 std::vector
<std::map
<Temp
, Temp
>> renames
;
52 std::vector
<std::map
<Temp
, uint32_t>> spills_entry
;
53 std::vector
<std::map
<Temp
, uint32_t>> spills_exit
;
54 std::vector
<bool> processed
;
55 std::stack
<Block
*> loop_header
;
56 std::vector
<std::map
<Temp
, std::pair
<uint32_t, uint32_t>>> next_use_distances_start
;
57 std::vector
<std::map
<Temp
, std::pair
<uint32_t, uint32_t>>> next_use_distances_end
;
58 std::vector
<std::pair
<RegClass
, std::set
<uint32_t>>> interferences
;
59 std::vector
<std::vector
<uint32_t>> affinities
;
60 std::vector
<bool> is_reloaded
;
61 std::map
<Temp
, remat_info
> remat
;
62 std::map
<Instruction
*, bool> remat_used
;
65 spill_ctx(const RegisterDemand target_pressure
, Program
* program
,
66 std::vector
<std::vector
<RegisterDemand
>> register_demand
)
67 : target_pressure(target_pressure
), program(program
),
68 register_demand(std::move(register_demand
)), renames(program
->blocks
.size()),
69 spills_entry(program
->blocks
.size()), spills_exit(program
->blocks
.size()),
70 processed(program
->blocks
.size(), false), wave_size(program
->wave_size
) {}
72 void add_affinity(uint32_t first
, uint32_t second
)
74 unsigned found_first
= affinities
.size();
75 unsigned found_second
= affinities
.size();
76 for (unsigned i
= 0; i
< affinities
.size(); i
++) {
77 std::vector
<uint32_t>& vec
= affinities
[i
];
78 for (uint32_t entry
: vec
) {
81 else if (entry
== second
)
85 if (found_first
== affinities
.size() && found_second
== affinities
.size()) {
86 affinities
.emplace_back(std::vector
<uint32_t>({first
, second
}));
87 } else if (found_first
< affinities
.size() && found_second
== affinities
.size()) {
88 affinities
[found_first
].push_back(second
);
89 } else if (found_second
< affinities
.size() && found_first
== affinities
.size()) {
90 affinities
[found_second
].push_back(first
);
91 } else if (found_first
!= found_second
) {
92 /* merge second into first */
93 affinities
[found_first
].insert(affinities
[found_first
].end(), affinities
[found_second
].begin(), affinities
[found_second
].end());
94 affinities
.erase(std::next(affinities
.begin(), found_second
));
96 assert(found_first
== found_second
);
100 uint32_t allocate_spill_id(RegClass rc
)
102 interferences
.emplace_back(rc
, std::set
<uint32_t>());
103 is_reloaded
.push_back(false);
104 return next_spill_id
++;
107 uint32_t next_spill_id
= 0;
110 int32_t get_dominator(int idx_a
, int idx_b
, Program
* program
, bool is_linear
)
118 while (idx_a
!= idx_b
) {
120 idx_a
= program
->blocks
[idx_a
].linear_idom
;
122 idx_b
= program
->blocks
[idx_b
].linear_idom
;
125 while (idx_a
!= idx_b
) {
127 idx_a
= program
->blocks
[idx_a
].logical_idom
;
129 idx_b
= program
->blocks
[idx_b
].logical_idom
;
136 void next_uses_per_block(spill_ctx
& ctx
, unsigned block_idx
, std::set
<uint32_t>& worklist
)
138 Block
* block
= &ctx
.program
->blocks
[block_idx
];
139 std::map
<Temp
, std::pair
<uint32_t, uint32_t>> next_uses
= ctx
.next_use_distances_end
[block_idx
];
141 /* to compute the next use distance at the beginning of the block, we have to add the block's size */
142 for (std::map
<Temp
, std::pair
<uint32_t, uint32_t>>::iterator it
= next_uses
.begin(); it
!= next_uses
.end(); ++it
)
143 it
->second
.second
= it
->second
.second
+ block
->instructions
.size();
145 int idx
= block
->instructions
.size() - 1;
147 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
149 if (instr
->opcode
== aco_opcode::p_linear_phi
||
150 instr
->opcode
== aco_opcode::p_phi
)
153 for (const Definition
& def
: instr
->definitions
) {
155 next_uses
.erase(def
.getTemp());
158 for (const Operand
& op
: instr
->operands
) {
160 if (op
.isFixed() && op
.physReg() == exec
)
162 if (op
.regClass().type() == RegType::vgpr
&& op
.regClass().is_linear())
165 next_uses
[op
.getTemp()] = {block_idx
, idx
};
170 assert(block_idx
!= 0 || next_uses
.empty());
171 ctx
.next_use_distances_start
[block_idx
] = next_uses
;
173 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
174 assert(instr
->opcode
== aco_opcode::p_linear_phi
|| instr
->opcode
== aco_opcode::p_phi
);
176 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
177 unsigned pred_idx
= instr
->opcode
== aco_opcode::p_phi
?
178 block
->logical_preds
[i
] :
179 block
->linear_preds
[i
];
180 if (instr
->operands
[i
].isTemp()) {
181 if (instr
->operands
[i
].getTemp() == ctx
.program
->blocks
[pred_idx
].live_out_exec
)
183 if (ctx
.next_use_distances_end
[pred_idx
].find(instr
->operands
[i
].getTemp()) == ctx
.next_use_distances_end
[pred_idx
].end() ||
184 ctx
.next_use_distances_end
[pred_idx
][instr
->operands
[i
].getTemp()] != std::pair
<uint32_t, uint32_t>{block_idx
, 0})
185 worklist
.insert(pred_idx
);
186 ctx
.next_use_distances_end
[pred_idx
][instr
->operands
[i
].getTemp()] = {block_idx
, 0};
189 next_uses
.erase(instr
->definitions
[0].getTemp());
193 /* all remaining live vars must be live-out at the predecessors */
194 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: next_uses
) {
195 Temp temp
= pair
.first
;
196 uint32_t distance
= pair
.second
.second
;
197 uint32_t dom
= pair
.second
.first
;
198 std::vector
<unsigned>& preds
= temp
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
199 for (unsigned pred_idx
: preds
) {
200 if (temp
== ctx
.program
->blocks
[pred_idx
].live_out_exec
)
202 if (ctx
.program
->blocks
[pred_idx
].loop_nest_depth
> block
->loop_nest_depth
)
204 if (ctx
.next_use_distances_end
[pred_idx
].find(temp
) != ctx
.next_use_distances_end
[pred_idx
].end()) {
205 dom
= get_dominator(dom
, ctx
.next_use_distances_end
[pred_idx
][temp
].first
, ctx
.program
, temp
.is_linear());
206 distance
= std::min(ctx
.next_use_distances_end
[pred_idx
][temp
].second
, distance
);
208 if (ctx
.next_use_distances_end
[pred_idx
][temp
] != std::pair
<uint32_t, uint32_t>{dom
, distance
})
209 worklist
.insert(pred_idx
);
210 ctx
.next_use_distances_end
[pred_idx
][temp
] = {dom
, distance
};
216 void compute_global_next_uses(spill_ctx
& ctx
, std::vector
<std::set
<Temp
>>& live_out
)
218 ctx
.next_use_distances_start
.resize(ctx
.program
->blocks
.size());
219 ctx
.next_use_distances_end
.resize(ctx
.program
->blocks
.size());
220 std::set
<uint32_t> worklist
;
221 for (Block
& block
: ctx
.program
->blocks
)
222 worklist
.insert(block
.index
);
224 while (!worklist
.empty()) {
225 std::set
<unsigned>::reverse_iterator b_it
= worklist
.rbegin();
226 unsigned block_idx
= *b_it
;
227 worklist
.erase(block_idx
);
228 next_uses_per_block(ctx
, block_idx
, worklist
);
232 bool should_rematerialize(aco_ptr
<Instruction
>& instr
)
234 /* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */
235 if (instr
->format
!= Format::VOP1
&& instr
->format
!= Format::SOP1
&& instr
->format
!= Format::PSEUDO
&& instr
->format
!= Format::SOPK
)
237 /* TODO: pseudo-instruction rematerialization is only supported for p_create_vector */
238 if (instr
->format
== Format::PSEUDO
&& instr
->opcode
!= aco_opcode::p_create_vector
)
240 if (instr
->format
== Format::SOPK
&& instr
->opcode
!= aco_opcode::s_movk_i32
)
243 for (const Operand
& op
: instr
->operands
) {
244 /* TODO: rematerialization using temporaries isn't yet supported */
249 /* TODO: rematerialization with multiple definitions isn't yet supported */
250 if (instr
->definitions
.size() > 1)
256 aco_ptr
<Instruction
> do_reload(spill_ctx
& ctx
, Temp tmp
, Temp new_name
, uint32_t spill_id
)
258 std::map
<Temp
, remat_info
>::iterator remat
= ctx
.remat
.find(tmp
);
259 if (remat
!= ctx
.remat
.end()) {
260 Instruction
*instr
= remat
->second
.instr
;
261 assert((instr
->format
== Format::VOP1
|| instr
->format
== Format::SOP1
|| instr
->format
== Format::PSEUDO
|| instr
->format
== Format::SOPK
) && "unsupported");
262 assert((instr
->format
!= Format::PSEUDO
|| instr
->opcode
== aco_opcode::p_create_vector
) && "unsupported");
263 assert(instr
->definitions
.size() == 1 && "unsupported");
265 aco_ptr
<Instruction
> res
;
266 if (instr
->format
== Format::VOP1
) {
267 res
.reset(create_instruction
<VOP1_instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
268 } else if (instr
->format
== Format::SOP1
) {
269 res
.reset(create_instruction
<SOP1_instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
270 } else if (instr
->format
== Format::PSEUDO
) {
271 res
.reset(create_instruction
<Pseudo_instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
272 } else if (instr
->format
== Format::SOPK
) {
273 res
.reset(create_instruction
<SOPK_instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
274 static_cast<SOPK_instruction
*>(res
.get())->imm
= static_cast<SOPK_instruction
*>(instr
)->imm
;
276 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
277 res
->operands
[i
] = instr
->operands
[i
];
278 if (instr
->operands
[i
].isTemp()) {
279 assert(false && "unsupported");
280 if (ctx
.remat
.count(instr
->operands
[i
].getTemp()))
281 ctx
.remat_used
[ctx
.remat
[instr
->operands
[i
].getTemp()].instr
] = true;
284 res
->definitions
[0] = Definition(new_name
);
287 aco_ptr
<Pseudo_instruction
> reload
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_reload
, Format::PSEUDO
, 1, 1)};
288 reload
->operands
[0] = Operand(spill_id
);
289 reload
->definitions
[0] = Definition(new_name
);
290 ctx
.is_reloaded
[spill_id
] = true;
295 void get_rematerialize_info(spill_ctx
& ctx
)
297 for (Block
& block
: ctx
.program
->blocks
) {
298 bool logical
= false;
299 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
300 if (instr
->opcode
== aco_opcode::p_logical_start
)
302 else if (instr
->opcode
== aco_opcode::p_logical_end
)
304 if (logical
&& should_rematerialize(instr
)) {
305 for (const Definition
& def
: instr
->definitions
) {
307 ctx
.remat
[def
.getTemp()] = (remat_info
){instr
.get()};
308 ctx
.remat_used
[instr
.get()] = false;
316 std::vector
<std::map
<Temp
, uint32_t>> local_next_uses(spill_ctx
& ctx
, Block
* block
)
318 std::vector
<std::map
<Temp
, uint32_t>> local_next_uses(block
->instructions
.size());
320 std::map
<Temp
, uint32_t> next_uses
;
321 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block
->index
])
322 next_uses
[pair
.first
] = pair
.second
.second
+ block
->instructions
.size();
324 for (int idx
= block
->instructions
.size() - 1; idx
>= 0; idx
--) {
325 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
328 if (instr
->opcode
== aco_opcode::p_phi
|| instr
->opcode
== aco_opcode::p_linear_phi
)
331 for (const Operand
& op
: instr
->operands
) {
332 if (op
.isFixed() && op
.physReg() == exec
)
334 if (op
.regClass().type() == RegType::vgpr
&& op
.regClass().is_linear())
337 next_uses
[op
.getTemp()] = idx
;
339 for (const Definition
& def
: instr
->definitions
) {
341 next_uses
.erase(def
.getTemp());
343 local_next_uses
[idx
] = next_uses
;
345 return local_next_uses
;
349 RegisterDemand
init_live_in_vars(spill_ctx
& ctx
, Block
* block
, unsigned block_idx
)
351 RegisterDemand spilled_registers
;
353 /* first block, nothing was spilled before */
357 /* loop header block */
358 if (block
->loop_nest_depth
> ctx
.program
->blocks
[block_idx
- 1].loop_nest_depth
) {
359 assert(block
->linear_preds
[0] == block_idx
- 1);
360 assert(block
->logical_preds
[0] == block_idx
- 1);
362 /* create new loop_info */
363 ctx
.loop_header
.emplace(block
);
365 /* check how many live-through variables should be spilled */
366 RegisterDemand new_demand
;
367 unsigned i
= block_idx
;
368 while (ctx
.program
->blocks
[i
].loop_nest_depth
>= block
->loop_nest_depth
) {
369 assert(ctx
.program
->blocks
.size() > i
);
370 new_demand
.update(ctx
.program
->blocks
[i
].register_demand
);
373 unsigned loop_end
= i
;
375 /* select live-through vgpr variables */
376 while (new_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
) {
377 unsigned distance
= 0;
379 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block_idx
- 1]) {
380 if (pair
.first
.type() == RegType::vgpr
&&
381 pair
.second
.first
>= loop_end
&&
382 pair
.second
.second
> distance
&&
383 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
384 to_spill
= pair
.first
;
385 distance
= pair
.second
.second
;
392 if (ctx
.spills_exit
[block_idx
- 1].find(to_spill
) == ctx
.spills_exit
[block_idx
- 1].end()) {
393 spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
395 spill_id
= ctx
.spills_exit
[block_idx
- 1][to_spill
];
398 ctx
.spills_entry
[block_idx
][to_spill
] = spill_id
;
399 spilled_registers
.vgpr
+= to_spill
.size();
402 /* select live-through sgpr variables */
403 while (new_demand
.sgpr
- spilled_registers
.sgpr
> ctx
.target_pressure
.sgpr
) {
404 unsigned distance
= 0;
406 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block_idx
- 1]) {
407 if (pair
.first
.type() == RegType::sgpr
&&
408 pair
.second
.first
>= loop_end
&&
409 pair
.second
.second
> distance
&&
410 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
411 to_spill
= pair
.first
;
412 distance
= pair
.second
.second
;
419 if (ctx
.spills_exit
[block_idx
- 1].find(to_spill
) == ctx
.spills_exit
[block_idx
- 1].end()) {
420 spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
422 spill_id
= ctx
.spills_exit
[block_idx
- 1][to_spill
];
425 ctx
.spills_entry
[block_idx
][to_spill
] = spill_id
;
426 spilled_registers
.sgpr
+= to_spill
.size();
432 if (!RegisterDemand(new_demand
- spilled_registers
).exceeds(ctx
.target_pressure
))
433 return spilled_registers
;
435 /* if reg pressure is too high at beginning of loop, add variables with furthest use */
437 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
|| block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
)
440 assert(idx
!= 0 && "loop without phis: TODO");
442 RegisterDemand reg_pressure
= ctx
.register_demand
[block_idx
][idx
] - spilled_registers
;
443 while (reg_pressure
.sgpr
> ctx
.target_pressure
.sgpr
) {
444 unsigned distance
= 0;
446 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
447 if (pair
.first
.type() == RegType::sgpr
&&
448 pair
.second
.second
> distance
&&
449 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
450 to_spill
= pair
.first
;
451 distance
= pair
.second
.second
;
454 assert(distance
!= 0);
456 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
457 spilled_registers
.sgpr
+= to_spill
.size();
458 reg_pressure
.sgpr
-= to_spill
.size();
460 while (reg_pressure
.vgpr
> ctx
.target_pressure
.vgpr
) {
461 unsigned distance
= 0;
463 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
464 if (pair
.first
.type() == RegType::vgpr
&&
465 pair
.second
.second
> distance
&&
466 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
467 to_spill
= pair
.first
;
468 distance
= pair
.second
.second
;
471 assert(distance
!= 0);
472 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
473 spilled_registers
.vgpr
+= to_spill
.size();
474 reg_pressure
.vgpr
-= to_spill
.size();
477 return spilled_registers
;
481 if (block
->linear_preds
.size() == 1 && !(block
->kind
& block_kind_loop_exit
)) {
482 /* keep variables spilled if they are alive and not used in the current block */
483 unsigned pred_idx
= block
->linear_preds
[0];
484 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
485 if (pair
.first
.type() == RegType::sgpr
&&
486 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
487 ctx
.next_use_distances_start
[block_idx
][pair
.first
].second
> block_idx
) {
488 ctx
.spills_entry
[block_idx
].insert(pair
);
489 spilled_registers
.sgpr
+= pair
.first
.size();
492 if (block
->logical_preds
.size() == 1) {
493 pred_idx
= block
->logical_preds
[0];
494 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
495 if (pair
.first
.type() == RegType::vgpr
&&
496 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
497 ctx
.next_use_distances_start
[block_idx
][pair
.first
].second
> block_idx
) {
498 ctx
.spills_entry
[block_idx
].insert(pair
);
499 spilled_registers
.vgpr
+= pair
.first
.size();
504 /* if register demand is still too high, we just keep all spilled live vars and process the block */
505 if (block
->register_demand
.sgpr
- spilled_registers
.sgpr
> ctx
.target_pressure
.sgpr
) {
506 pred_idx
= block
->linear_preds
[0];
507 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
508 if (pair
.first
.type() == RegType::sgpr
&&
509 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
510 ctx
.spills_entry
[block_idx
].insert(pair
).second
) {
511 spilled_registers
.sgpr
+= pair
.first
.size();
515 if (block
->register_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
&& block
->logical_preds
.size() == 1) {
516 pred_idx
= block
->logical_preds
[0];
517 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
518 if (pair
.first
.type() == RegType::vgpr
&&
519 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
520 ctx
.spills_entry
[block_idx
].insert(pair
).second
) {
521 spilled_registers
.vgpr
+= pair
.first
.size();
526 return spilled_registers
;
529 /* else: merge block */
530 std::set
<Temp
> partial_spills
;
532 /* keep variables spilled on all incoming paths */
533 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
534 std::vector
<unsigned>& preds
= pair
.first
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
535 /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload it.
536 * Otherwise, if any predecessor reloads it, ensure it's reloaded on all other predecessors.
537 * The idea is that it's better in practice to rematerialize redundantly than to create lots of phis. */
538 /* TODO: test this idea with more than Dawn of War III shaders (the current pipeline-db doesn't seem to exercise this path much) */
539 bool remat
= ctx
.remat
.count(pair
.first
);
541 uint32_t spill_id
= 0;
542 for (unsigned pred_idx
: preds
) {
543 /* variable is not even live at the predecessor: probably from a phi */
544 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end()) {
548 if (ctx
.spills_exit
[pred_idx
].find(pair
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
552 partial_spills
.insert(pair
.first
);
553 /* it might be that on one incoming path, the variable has a different spill_id, but add_couple_code() will take care of that. */
554 spill_id
= ctx
.spills_exit
[pred_idx
][pair
.first
];
560 ctx
.spills_entry
[block_idx
][pair
.first
] = spill_id
;
561 partial_spills
.erase(pair
.first
);
562 spilled_registers
+= pair
.first
;
568 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
||
569 block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
) {
570 aco_ptr
<Instruction
>& phi
= block
->instructions
[idx
];
571 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
574 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
575 if (phi
->operands
[i
].isUndefined())
577 assert(phi
->operands
[i
].isTemp());
578 if (ctx
.spills_exit
[preds
[i
]].find(phi
->operands
[i
].getTemp()) == ctx
.spills_exit
[preds
[i
]].end())
581 partial_spills
.insert(phi
->definitions
[0].getTemp());
584 ctx
.spills_entry
[block_idx
][phi
->definitions
[0].getTemp()] = ctx
.allocate_spill_id(phi
->definitions
[0].regClass());
585 partial_spills
.erase(phi
->definitions
[0].getTemp());
586 spilled_registers
+= phi
->definitions
[0].getTemp();
592 /* if reg pressure at first instruction is still too high, add partially spilled variables */
593 RegisterDemand reg_pressure
;
595 for (const Definition
& def
: block
->instructions
[idx
]->definitions
) {
597 reg_pressure
-= def
.getTemp();
600 for (const Operand
& op
: block
->instructions
[idx
]->operands
) {
601 if (op
.isTemp() && op
.isFirstKill()) {
602 reg_pressure
+= op
.getTemp();
608 reg_pressure
+= ctx
.register_demand
[block_idx
][idx
] - spilled_registers
;
610 while (reg_pressure
.sgpr
> ctx
.target_pressure
.sgpr
) {
611 assert(!partial_spills
.empty());
613 std::set
<Temp
>::iterator it
= partial_spills
.begin();
615 unsigned distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
616 while (it
!= partial_spills
.end()) {
617 assert(ctx
.spills_entry
[block_idx
].find(*it
) == ctx
.spills_entry
[block_idx
].end());
619 if (it
->type() == RegType::sgpr
&& ctx
.next_use_distances_start
[block_idx
][*it
].second
> distance
) {
620 distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
625 assert(distance
!= 0);
627 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
628 partial_spills
.erase(to_spill
);
629 spilled_registers
.sgpr
+= to_spill
.size();
630 reg_pressure
.sgpr
-= to_spill
.size();
633 while (reg_pressure
.vgpr
> ctx
.target_pressure
.vgpr
) {
634 assert(!partial_spills
.empty());
636 std::set
<Temp
>::iterator it
= partial_spills
.begin();
638 unsigned distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
639 while (it
!= partial_spills
.end()) {
640 assert(ctx
.spills_entry
[block_idx
].find(*it
) == ctx
.spills_entry
[block_idx
].end());
642 if (it
->type() == RegType::vgpr
&& ctx
.next_use_distances_start
[block_idx
][*it
].second
> distance
) {
643 distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
648 assert(distance
!= 0);
650 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
651 partial_spills
.erase(to_spill
);
652 spilled_registers
.vgpr
+= to_spill
.size();
653 reg_pressure
.vgpr
-= to_spill
.size();
656 return spilled_registers
;
660 RegisterDemand
get_demand_before(spill_ctx
& ctx
, unsigned block_idx
, unsigned idx
)
663 RegisterDemand demand_before
= ctx
.register_demand
[block_idx
][idx
];
664 aco_ptr
<Instruction
>& instr
= ctx
.program
->blocks
[block_idx
].instructions
[idx
];
665 for (const Definition
& def
: instr
->definitions
)
666 demand_before
-= def
.getTemp();
667 for (const Operand
& op
: instr
->operands
) {
668 if (op
.isFirstKill())
669 demand_before
+= op
.getTemp();
671 return demand_before
;
673 return ctx
.register_demand
[block_idx
][idx
- 1];
677 void add_coupling_code(spill_ctx
& ctx
, Block
* block
, unsigned block_idx
)
679 /* no coupling code necessary */
680 if (block
->linear_preds
.size() == 0)
683 std::vector
<aco_ptr
<Instruction
>> instructions
;
684 /* branch block: TODO take other branch into consideration */
685 if (block
->linear_preds
.size() == 1 && !(block
->kind
& (block_kind_loop_exit
| block_kind_loop_header
))) {
686 assert(ctx
.processed
[block
->linear_preds
[0]]);
687 assert(ctx
.register_demand
[block_idx
].size() == block
->instructions
.size());
688 std::vector
<RegisterDemand
> reg_demand
;
689 unsigned insert_idx
= 0;
690 unsigned pred_idx
= block
->linear_preds
[0];
691 RegisterDemand demand_before
= get_demand_before(ctx
, block_idx
, 0);
693 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> live
: ctx
.next_use_distances_start
[block_idx
]) {
694 if (!live
.first
.is_linear())
697 if (ctx
.spills_entry
[block_idx
].find(live
.first
) != ctx
.spills_entry
[block_idx
].end())
700 /* in register at end of predecessor */
701 if (ctx
.spills_exit
[pred_idx
].find(live
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
702 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(live
.first
);
703 if (it
!= ctx
.renames
[pred_idx
].end())
704 ctx
.renames
[block_idx
].insert(*it
);
708 /* variable is spilled at predecessor and live at current block: create reload instruction */
709 Temp new_name
= {ctx
.program
->allocateId(), live
.first
.regClass()};
710 aco_ptr
<Instruction
> reload
= do_reload(ctx
, live
.first
, new_name
, ctx
.spills_exit
[pred_idx
][live
.first
]);
711 instructions
.emplace_back(std::move(reload
));
712 reg_demand
.push_back(demand_before
);
713 ctx
.renames
[block_idx
][live
.first
] = new_name
;
716 if (block
->logical_preds
.size() == 1) {
718 assert(insert_idx
< block
->instructions
.size());
719 instructions
.emplace_back(std::move(block
->instructions
[insert_idx
]));
720 reg_demand
.push_back(ctx
.register_demand
[block_idx
][insert_idx
]);
722 } while (instructions
.back()->opcode
!= aco_opcode::p_logical_start
);
724 unsigned pred_idx
= block
->logical_preds
[0];
725 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> live
: ctx
.next_use_distances_start
[block_idx
]) {
726 if (live
.first
.is_linear())
729 if (ctx
.spills_entry
[block_idx
].find(live
.first
) != ctx
.spills_entry
[block_idx
].end())
732 /* in register at end of predecessor */
733 if (ctx
.spills_exit
[pred_idx
].find(live
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
734 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(live
.first
);
735 if (it
!= ctx
.renames
[pred_idx
].end())
736 ctx
.renames
[block_idx
].insert(*it
);
740 /* variable is spilled at predecessor and live at current block: create reload instruction */
741 Temp new_name
= {ctx
.program
->allocateId(), live
.first
.regClass()};
742 aco_ptr
<Instruction
> reload
= do_reload(ctx
, live
.first
, new_name
, ctx
.spills_exit
[pred_idx
][live
.first
]);
743 instructions
.emplace_back(std::move(reload
));
744 reg_demand
.emplace_back(reg_demand
.back());
745 ctx
.renames
[block_idx
][live
.first
] = new_name
;
749 /* combine new reload instructions with original block */
750 if (!instructions
.empty()) {
751 reg_demand
.insert(reg_demand
.end(), std::next(ctx
.register_demand
[block
->index
].begin(), insert_idx
),
752 ctx
.register_demand
[block
->index
].end());
753 ctx
.register_demand
[block_idx
] = std::move(reg_demand
);
754 instructions
.insert(instructions
.end(),
755 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(std::next(block
->instructions
.begin(), insert_idx
)),
756 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(block
->instructions
.end()));
757 block
->instructions
= std::move(instructions
);
762 /* loop header and merge blocks: check if all (linear) predecessors have been processed */
763 for (ASSERTED
unsigned pred
: block
->linear_preds
)
764 assert(ctx
.processed
[pred
]);
766 /* iterate the phi nodes for which operands to spill at the predecessor */
767 for (aco_ptr
<Instruction
>& phi
: block
->instructions
) {
768 if (phi
->opcode
!= aco_opcode::p_phi
&&
769 phi
->opcode
!= aco_opcode::p_linear_phi
)
772 /* if the phi is not spilled, add to instructions */
773 if (ctx
.spills_entry
[block_idx
].find(phi
->definitions
[0].getTemp()) == ctx
.spills_entry
[block_idx
].end()) {
774 instructions
.emplace_back(std::move(phi
));
778 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
779 uint32_t def_spill_id
= ctx
.spills_entry
[block_idx
][phi
->definitions
[0].getTemp()];
781 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
782 if (phi
->operands
[i
].isUndefined())
785 unsigned pred_idx
= preds
[i
];
786 assert(phi
->operands
[i
].isTemp() && phi
->operands
[i
].isKill());
787 Temp var
= phi
->operands
[i
].getTemp();
789 /* build interferences between the phi def and all spilled variables at the predecessor blocks */
790 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
791 if (var
== pair
.first
)
793 ctx
.interferences
[def_spill_id
].second
.emplace(pair
.second
);
794 ctx
.interferences
[pair
.second
].second
.emplace(def_spill_id
);
797 /* check if variable is already spilled at predecessor */
798 std::map
<Temp
, uint32_t>::iterator spilled
= ctx
.spills_exit
[pred_idx
].find(var
);
799 if (spilled
!= ctx
.spills_exit
[pred_idx
].end()) {
800 if (spilled
->second
!= def_spill_id
)
801 ctx
.add_affinity(def_spill_id
, spilled
->second
);
805 /* rename if necessary */
806 std::map
<Temp
, Temp
>::iterator rename_it
= ctx
.renames
[pred_idx
].find(var
);
807 if (rename_it
!= ctx
.renames
[pred_idx
].end()) {
808 var
= rename_it
->second
;
809 ctx
.renames
[pred_idx
].erase(rename_it
);
812 uint32_t spill_id
= ctx
.allocate_spill_id(phi
->definitions
[0].regClass());
813 ctx
.add_affinity(def_spill_id
, spill_id
);
814 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
815 spill
->operands
[0] = Operand(var
);
816 spill
->operands
[1] = Operand(spill_id
);
817 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
818 unsigned idx
= pred
.instructions
.size();
822 } while (phi
->opcode
== aco_opcode::p_phi
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
823 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
824 pred
.instructions
.insert(it
, std::move(spill
));
825 ctx
.spills_exit
[pred_idx
][phi
->operands
[i
].getTemp()] = spill_id
;
828 /* remove phi from instructions */
832 /* iterate all (other) spilled variables for which to spill at the predecessor */
833 // TODO: would be better to have them sorted: first vgprs and first with longest distance
834 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_entry
[block_idx
]) {
835 std::vector
<unsigned> preds
= pair
.first
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
837 for (unsigned pred_idx
: preds
) {
838 /* variable is already spilled at predecessor */
839 std::map
<Temp
, uint32_t>::iterator spilled
= ctx
.spills_exit
[pred_idx
].find(pair
.first
);
840 if (spilled
!= ctx
.spills_exit
[pred_idx
].end()) {
841 if (spilled
->second
!= pair
.second
)
842 ctx
.add_affinity(pair
.second
, spilled
->second
);
846 /* variable is dead at predecessor, it must be from a phi: this works because of CSSA form */
847 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end())
850 /* add interferences between spilled variable and predecessors exit spills */
851 for (std::pair
<Temp
, uint32_t> exit_spill
: ctx
.spills_exit
[pred_idx
]) {
852 if (exit_spill
.first
== pair
.first
)
854 ctx
.interferences
[exit_spill
.second
].second
.emplace(pair
.second
);
855 ctx
.interferences
[pair
.second
].second
.emplace(exit_spill
.second
);
858 /* variable is in register at predecessor and has to be spilled */
859 /* rename if necessary */
860 Temp var
= pair
.first
;
861 std::map
<Temp
, Temp
>::iterator rename_it
= ctx
.renames
[pred_idx
].find(var
);
862 if (rename_it
!= ctx
.renames
[pred_idx
].end()) {
863 var
= rename_it
->second
;
864 ctx
.renames
[pred_idx
].erase(rename_it
);
867 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
868 spill
->operands
[0] = Operand(var
);
869 spill
->operands
[1] = Operand(pair
.second
);
870 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
871 unsigned idx
= pred
.instructions
.size();
875 } while (pair
.first
.type() == RegType::vgpr
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
876 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
877 pred
.instructions
.insert(it
, std::move(spill
));
878 ctx
.spills_exit
[pred
.index
][pair
.first
] = pair
.second
;
882 /* iterate phis for which operands to reload */
883 for (aco_ptr
<Instruction
>& phi
: instructions
) {
884 assert(phi
->opcode
== aco_opcode::p_phi
|| phi
->opcode
== aco_opcode::p_linear_phi
);
885 assert(ctx
.spills_entry
[block_idx
].find(phi
->definitions
[0].getTemp()) == ctx
.spills_entry
[block_idx
].end());
887 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
888 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
889 if (!phi
->operands
[i
].isTemp())
891 unsigned pred_idx
= preds
[i
];
894 if (ctx
.spills_exit
[pred_idx
].find(phi
->operands
[i
].getTemp()) == ctx
.spills_exit
[pred_idx
].end()) {
895 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(phi
->operands
[i
].getTemp());
896 if (it
!= ctx
.renames
[pred_idx
].end())
897 phi
->operands
[i
].setTemp(it
->second
);
901 Temp tmp
= phi
->operands
[i
].getTemp();
903 /* reload phi operand at end of predecessor block */
904 Temp new_name
= {ctx
.program
->allocateId(), tmp
.regClass()};
905 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
906 unsigned idx
= pred
.instructions
.size();
910 } while (phi
->opcode
== aco_opcode::p_phi
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
911 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
913 aco_ptr
<Instruction
> reload
= do_reload(ctx
, tmp
, new_name
, ctx
.spills_exit
[pred_idx
][tmp
]);
914 pred
.instructions
.insert(it
, std::move(reload
));
916 ctx
.spills_exit
[pred_idx
].erase(tmp
);
917 ctx
.renames
[pred_idx
][tmp
] = new_name
;
918 phi
->operands
[i
].setTemp(new_name
);
922 /* iterate live variables for which to reload */
923 // TODO: reload at current block if variable is spilled on all predecessors
924 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
925 /* skip spilled variables */
926 if (ctx
.spills_entry
[block_idx
].find(pair
.first
) != ctx
.spills_entry
[block_idx
].end())
928 std::vector
<unsigned> preds
= pair
.first
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
930 /* variable is dead at predecessor, it must be from a phi */
931 bool is_dead
= false;
932 for (unsigned pred_idx
: preds
) {
933 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end())
938 for (unsigned pred_idx
: preds
) {
939 /* the variable is not spilled at the predecessor */
940 if (ctx
.spills_exit
[pred_idx
].find(pair
.first
) == ctx
.spills_exit
[pred_idx
].end())
943 /* variable is spilled at predecessor and has to be reloaded */
944 Temp new_name
= {ctx
.program
->allocateId(), pair
.first
.regClass()};
945 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
946 unsigned idx
= pred
.instructions
.size();
950 } while (pair
.first
.type() == RegType::vgpr
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
951 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
953 aco_ptr
<Instruction
> reload
= do_reload(ctx
, pair
.first
, new_name
, ctx
.spills_exit
[pred
.index
][pair
.first
]);
954 pred
.instructions
.insert(it
, std::move(reload
));
956 ctx
.spills_exit
[pred
.index
].erase(pair
.first
);
957 ctx
.renames
[pred
.index
][pair
.first
] = new_name
;
960 /* check if we have to create a new phi for this variable */
961 Temp rename
= Temp();
963 for (unsigned pred_idx
: preds
) {
964 if (ctx
.renames
[pred_idx
].find(pair
.first
) == ctx
.renames
[pred_idx
].end()) {
965 if (rename
== Temp())
968 is_same
= rename
== pair
.first
;
970 if (rename
== Temp())
971 rename
= ctx
.renames
[pred_idx
][pair
.first
];
973 is_same
= rename
== ctx
.renames
[pred_idx
][pair
.first
];
981 /* the variable was renamed differently in the predecessors: we have to create a phi */
982 aco_opcode opcode
= pair
.first
.is_linear() ? aco_opcode::p_linear_phi
: aco_opcode::p_phi
;
983 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(opcode
, Format::PSEUDO
, preds
.size(), 1)};
984 rename
= {ctx
.program
->allocateId(), pair
.first
.regClass()};
985 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
987 if (ctx
.renames
[preds
[i
]].find(pair
.first
) != ctx
.renames
[preds
[i
]].end())
988 tmp
= ctx
.renames
[preds
[i
]][pair
.first
];
989 else if (preds
[i
] >= block_idx
)
993 phi
->operands
[i
] = Operand(tmp
);
995 phi
->definitions
[0] = Definition(rename
);
996 instructions
.emplace_back(std::move(phi
));
999 /* the variable was renamed: add new name to renames */
1000 if (!(rename
== Temp() || rename
== pair
.first
))
1001 ctx
.renames
[block_idx
][pair
.first
] = rename
;
1004 /* combine phis with instructions */
1006 while (!block
->instructions
[idx
]) {
1010 if (!ctx
.processed
[block_idx
]) {
1011 assert(!(block
->kind
& block_kind_loop_header
));
1012 RegisterDemand demand_before
= get_demand_before(ctx
, block_idx
, idx
);
1013 ctx
.register_demand
[block
->index
].erase(ctx
.register_demand
[block
->index
].begin(), ctx
.register_demand
[block
->index
].begin() + idx
);
1014 ctx
.register_demand
[block
->index
].insert(ctx
.register_demand
[block
->index
].begin(), instructions
.size(), demand_before
);
1017 std::vector
<aco_ptr
<Instruction
>>::iterator start
= std::next(block
->instructions
.begin(), idx
);
1018 instructions
.insert(instructions
.end(), std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(start
),
1019 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(block
->instructions
.end()));
1020 block
->instructions
= std::move(instructions
);
1023 void process_block(spill_ctx
& ctx
, unsigned block_idx
, Block
* block
,
1024 std::map
<Temp
, uint32_t> ¤t_spills
, RegisterDemand spilled_registers
)
1026 assert(!ctx
.processed
[block_idx
]);
1028 std::vector
<std::map
<Temp
, uint32_t>> local_next_use_distance
;
1029 std::vector
<aco_ptr
<Instruction
>> instructions
;
1032 /* phis are handled separetely */
1033 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
||
1034 block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
) {
1035 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
1036 for (const Operand
& op
: instr
->operands
) {
1037 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
1038 if (op
.isTemp() && ctx
.remat
.count(op
.getTemp()))
1039 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
1041 instructions
.emplace_back(std::move(instr
));
1045 if (block
->register_demand
.exceeds(ctx
.target_pressure
))
1046 local_next_use_distance
= local_next_uses(ctx
, block
);
1048 while (idx
< block
->instructions
.size()) {
1049 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
1051 std::map
<Temp
, std::pair
<Temp
, uint32_t>> reloads
;
1052 std::map
<Temp
, uint32_t> spills
;
1053 /* rename and reload operands */
1054 for (Operand
& op
: instr
->operands
) {
1057 if (current_spills
.find(op
.getTemp()) == current_spills
.end()) {
1058 /* the Operand is in register: check if it was renamed */
1059 if (ctx
.renames
[block_idx
].find(op
.getTemp()) != ctx
.renames
[block_idx
].end())
1060 op
.setTemp(ctx
.renames
[block_idx
][op
.getTemp()]);
1061 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
1062 if (ctx
.remat
.count(op
.getTemp()))
1063 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
1066 /* the Operand is spilled: add it to reloads */
1067 Temp new_tmp
= {ctx
.program
->allocateId(), op
.regClass()};
1068 ctx
.renames
[block_idx
][op
.getTemp()] = new_tmp
;
1069 reloads
[new_tmp
] = std::make_pair(op
.getTemp(), current_spills
[op
.getTemp()]);
1070 current_spills
.erase(op
.getTemp());
1071 op
.setTemp(new_tmp
);
1072 spilled_registers
-= new_tmp
;
1075 /* check if register demand is low enough before and after the current instruction */
1076 if (block
->register_demand
.exceeds(ctx
.target_pressure
)) {
1078 RegisterDemand new_demand
= ctx
.register_demand
[block_idx
][idx
];
1079 new_demand
.update(get_demand_before(ctx
, block_idx
, idx
));
1081 assert(!local_next_use_distance
.empty());
1083 /* if reg pressure is too high, spill variable with furthest next use */
1084 while (RegisterDemand(new_demand
- spilled_registers
).exceeds(ctx
.target_pressure
)) {
1085 unsigned distance
= 0;
1087 bool do_rematerialize
= false;
1088 if (new_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
) {
1089 for (std::pair
<Temp
, uint32_t> pair
: local_next_use_distance
[idx
]) {
1090 bool can_rematerialize
= ctx
.remat
.count(pair
.first
);
1091 if (pair
.first
.type() == RegType::vgpr
&&
1092 ((pair
.second
> distance
&& can_rematerialize
== do_rematerialize
) ||
1093 (can_rematerialize
&& !do_rematerialize
&& pair
.second
> idx
)) &&
1094 current_spills
.find(pair
.first
) == current_spills
.end() &&
1095 ctx
.spills_exit
[block_idx
].find(pair
.first
) == ctx
.spills_exit
[block_idx
].end()) {
1096 to_spill
= pair
.first
;
1097 distance
= pair
.second
;
1098 do_rematerialize
= can_rematerialize
;
1102 for (std::pair
<Temp
, uint32_t> pair
: local_next_use_distance
[idx
]) {
1103 bool can_rematerialize
= ctx
.remat
.count(pair
.first
);
1104 if (pair
.first
.type() == RegType::sgpr
&&
1105 ((pair
.second
> distance
&& can_rematerialize
== do_rematerialize
) ||
1106 (can_rematerialize
&& !do_rematerialize
&& pair
.second
> idx
)) &&
1107 current_spills
.find(pair
.first
) == current_spills
.end() &&
1108 ctx
.spills_exit
[block_idx
].find(pair
.first
) == ctx
.spills_exit
[block_idx
].end()) {
1109 to_spill
= pair
.first
;
1110 distance
= pair
.second
;
1111 do_rematerialize
= can_rematerialize
;
1116 assert(distance
!= 0 && distance
> idx
);
1117 uint32_t spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
1119 /* add interferences with currently spilled variables */
1120 for (std::pair
<Temp
, uint32_t> pair
: current_spills
) {
1121 ctx
.interferences
[spill_id
].second
.emplace(pair
.second
);
1122 ctx
.interferences
[pair
.second
].second
.emplace(spill_id
);
1124 for (std::pair
<Temp
, std::pair
<Temp
, uint32_t>> pair
: reloads
) {
1125 ctx
.interferences
[spill_id
].second
.emplace(pair
.second
.second
);
1126 ctx
.interferences
[pair
.second
.second
].second
.emplace(spill_id
);
1129 current_spills
[to_spill
] = spill_id
;
1130 spilled_registers
+= to_spill
;
1132 /* rename if necessary */
1133 if (ctx
.renames
[block_idx
].find(to_spill
) != ctx
.renames
[block_idx
].end()) {
1134 to_spill
= ctx
.renames
[block_idx
][to_spill
];
1137 /* add spill to new instructions */
1138 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
1139 spill
->operands
[0] = Operand(to_spill
);
1140 spill
->operands
[1] = Operand(spill_id
);
1141 instructions
.emplace_back(std::move(spill
));
1145 /* add reloads and instruction to new instructions */
1146 for (std::pair
<Temp
, std::pair
<Temp
, uint32_t>> pair
: reloads
) {
1147 aco_ptr
<Instruction
> reload
= do_reload(ctx
, pair
.second
.first
, pair
.first
, pair
.second
.second
);
1148 instructions
.emplace_back(std::move(reload
));
1150 instructions
.emplace_back(std::move(instr
));
1154 block
->instructions
= std::move(instructions
);
1155 ctx
.spills_exit
[block_idx
].insert(current_spills
.begin(), current_spills
.end());
1158 void spill_block(spill_ctx
& ctx
, unsigned block_idx
)
1160 Block
* block
= &ctx
.program
->blocks
[block_idx
];
1162 /* determine set of variables which are spilled at the beginning of the block */
1163 RegisterDemand spilled_registers
= init_live_in_vars(ctx
, block
, block_idx
);
1165 /* add interferences for spilled variables */
1166 for (std::pair
<Temp
, uint32_t> x
: ctx
.spills_entry
[block_idx
]) {
1167 for (std::pair
<Temp
, uint32_t> y
: ctx
.spills_entry
[block_idx
])
1168 if (x
.second
!= y
.second
)
1169 ctx
.interferences
[x
.second
].second
.emplace(y
.second
);
1172 bool is_loop_header
= block
->loop_nest_depth
&& ctx
.loop_header
.top()->index
== block_idx
;
1173 if (!is_loop_header
) {
1174 /* add spill/reload code on incoming control flow edges */
1175 add_coupling_code(ctx
, block
, block_idx
);
1178 std::map
<Temp
, uint32_t> current_spills
= ctx
.spills_entry
[block_idx
];
1180 /* check conditions to process this block */
1181 bool process
= RegisterDemand(block
->register_demand
- spilled_registers
).exceeds(ctx
.target_pressure
) ||
1182 !ctx
.renames
[block_idx
].empty() ||
1183 ctx
.remat_used
.size();
1185 std::map
<Temp
, uint32_t>::iterator it
= current_spills
.begin();
1186 while (!process
&& it
!= current_spills
.end()) {
1187 if (ctx
.next_use_distances_start
[block_idx
][it
->first
].first
== block_idx
)
1193 process_block(ctx
, block_idx
, block
, current_spills
, spilled_registers
);
1195 ctx
.spills_exit
[block_idx
].insert(current_spills
.begin(), current_spills
.end());
1197 ctx
.processed
[block_idx
] = true;
1199 /* check if the next block leaves the current loop */
1200 if (block
->loop_nest_depth
== 0 || ctx
.program
->blocks
[block_idx
+ 1].loop_nest_depth
>= block
->loop_nest_depth
)
1203 Block
* loop_header
= ctx
.loop_header
.top();
1205 /* preserve original renames at end of loop header block */
1206 std::map
<Temp
, Temp
> renames
= std::move(ctx
.renames
[loop_header
->index
]);
1208 /* add coupling code to all loop header predecessors */
1209 add_coupling_code(ctx
, loop_header
, loop_header
->index
);
1211 /* update remat_used for phis added in add_coupling_code() */
1212 for (aco_ptr
<Instruction
>& instr
: loop_header
->instructions
) {
1215 for (const Operand
& op
: instr
->operands
) {
1216 if (op
.isTemp() && ctx
.remat
.count(op
.getTemp()))
1217 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
1221 /* propagate new renames through loop: i.e. repair the SSA */
1222 renames
.swap(ctx
.renames
[loop_header
->index
]);
1223 for (std::pair
<Temp
, Temp
> rename
: renames
) {
1224 for (unsigned idx
= loop_header
->index
; idx
<= block_idx
; idx
++) {
1225 Block
& current
= ctx
.program
->blocks
[idx
];
1226 std::vector
<aco_ptr
<Instruction
>>::iterator instr_it
= current
.instructions
.begin();
1228 /* first rename phis */
1229 while (instr_it
!= current
.instructions
.end()) {
1230 aco_ptr
<Instruction
>& phi
= *instr_it
;
1231 if (phi
->opcode
!= aco_opcode::p_phi
&& phi
->opcode
!= aco_opcode::p_linear_phi
)
1233 /* no need to rename the loop header phis once again. this happened in add_coupling_code() */
1234 if (idx
== loop_header
->index
) {
1239 for (Operand
& op
: phi
->operands
) {
1242 if (op
.getTemp() == rename
.first
)
1243 op
.setTemp(rename
.second
);
1248 std::map
<Temp
, std::pair
<uint32_t, uint32_t>>::iterator it
= ctx
.next_use_distances_start
[idx
].find(rename
.first
);
1250 /* variable is not live at beginning of this block */
1251 if (it
== ctx
.next_use_distances_start
[idx
].end())
1254 /* if the variable is live at the block's exit, add rename */
1255 if (ctx
.next_use_distances_end
[idx
].find(rename
.first
) != ctx
.next_use_distances_end
[idx
].end())
1256 ctx
.renames
[idx
].insert(rename
);
1258 /* rename all uses in this block */
1259 bool renamed
= false;
1260 while (!renamed
&& instr_it
!= current
.instructions
.end()) {
1261 aco_ptr
<Instruction
>& instr
= *instr_it
;
1262 for (Operand
& op
: instr
->operands
) {
1265 if (op
.getTemp() == rename
.first
) {
1266 op
.setTemp(rename
.second
);
1267 /* we can stop with this block as soon as the variable is spilled */
1268 if (instr
->opcode
== aco_opcode::p_spill
)
1277 /* remove loop header info from stack */
1278 ctx
.loop_header
.pop();
1281 Temp
load_scratch_resource(spill_ctx
& ctx
, Temp
& scratch_offset
,
1282 std::vector
<aco_ptr
<Instruction
>>& instructions
,
1283 unsigned offset
, bool is_top_level
)
1285 Builder
bld(ctx
.program
);
1287 bld
.reset(&instructions
);
1289 /* find p_logical_end */
1290 unsigned idx
= instructions
.size() - 1;
1291 while (instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
)
1293 bld
.reset(&instructions
, std::next(instructions
.begin(), idx
));
1296 Temp private_segment_buffer
= ctx
.program
->private_segment_buffer
;
1297 if (ctx
.program
->stage
!= compute_cs
)
1298 private_segment_buffer
= bld
.smem(aco_opcode::s_load_dwordx2
, bld
.def(s2
), private_segment_buffer
, Operand(0u));
1301 scratch_offset
= bld
.sop2(aco_opcode::s_add_u32
, bld
.def(s1
), bld
.def(s1
, scc
), scratch_offset
, Operand(offset
));
1303 uint32_t rsrc_conf
= S_008F0C_ADD_TID_ENABLE(1) |
1304 S_008F0C_INDEX_STRIDE(ctx
.program
->wave_size
== 64 ? 3 : 2);
1306 if (ctx
.program
->chip_class
>= GFX10
) {
1307 rsrc_conf
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
1308 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW
) |
1309 S_008F0C_RESOURCE_LEVEL(1);
1310 } else if (ctx
.program
->chip_class
<= GFX7
) { /* dfmt modifies stride on GFX8/GFX9 when ADD_TID_EN=1 */
1311 rsrc_conf
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
1312 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
1314 /* older generations need element size = 4 bytes. element size removed in GFX9 */
1315 if (ctx
.program
->chip_class
<= GFX8
)
1316 rsrc_conf
|= S_008F0C_ELEMENT_SIZE(1);
1318 return bld
.pseudo(aco_opcode::p_create_vector
, bld
.def(s4
),
1319 private_segment_buffer
, Operand(-1u),
1320 Operand(rsrc_conf
));
1323 void assign_spill_slots(spill_ctx
& ctx
, unsigned spills_to_vgpr
) {
1324 std::map
<uint32_t, uint32_t> sgpr_slot
;
1325 std::map
<uint32_t, uint32_t> vgpr_slot
;
1326 std::vector
<bool> is_assigned(ctx
.interferences
.size());
1328 /* first, handle affinities: just merge all interferences into both spill ids */
1329 for (std::vector
<uint32_t>& vec
: ctx
.affinities
) {
1330 for (unsigned i
= 0; i
< vec
.size(); i
++) {
1331 for (unsigned j
= i
+ 1; j
< vec
.size(); j
++) {
1332 assert(vec
[i
] != vec
[j
]);
1333 for (uint32_t id
: ctx
.interferences
[vec
[i
]].second
)
1334 ctx
.interferences
[id
].second
.insert(vec
[j
]);
1335 for (uint32_t id
: ctx
.interferences
[vec
[j
]].second
)
1336 ctx
.interferences
[id
].second
.insert(vec
[i
]);
1337 ctx
.interferences
[vec
[i
]].second
.insert(ctx
.interferences
[vec
[j
]].second
.begin(), ctx
.interferences
[vec
[j
]].second
.end());
1338 ctx
.interferences
[vec
[j
]].second
.insert(ctx
.interferences
[vec
[i
]].second
.begin(), ctx
.interferences
[vec
[i
]].second
.end());
1340 bool reloaded
= ctx
.is_reloaded
[vec
[i
]] || ctx
.is_reloaded
[vec
[j
]];
1341 ctx
.is_reloaded
[vec
[i
]] = reloaded
;
1342 ctx
.is_reloaded
[vec
[j
]] = reloaded
;
1346 for (ASSERTED
uint32_t i
= 0; i
< ctx
.interferences
.size(); i
++)
1347 for (ASSERTED
uint32_t id
: ctx
.interferences
[i
].second
)
1350 /* for each spill slot, assign as many spill ids as possible */
1351 std::vector
<std::set
<uint32_t>> spill_slot_interferences
;
1352 unsigned slot_idx
= 0;
1355 /* assign sgpr spill slots */
1358 for (unsigned id
= 0; id
< ctx
.interferences
.size(); id
++) {
1359 if (is_assigned
[id
] || !ctx
.is_reloaded
[id
])
1361 if (ctx
.interferences
[id
].first
.type() != RegType::sgpr
)
1364 /* check interferences */
1365 bool interferes
= false;
1366 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++) {
1367 if (i
== spill_slot_interferences
.size())
1368 spill_slot_interferences
.emplace_back(std::set
<uint32_t>());
1369 if (spill_slot_interferences
[i
].find(id
) != spill_slot_interferences
[i
].end() || i
/ ctx
.wave_size
!= slot_idx
/ ctx
.wave_size
) {
1379 /* we found a spill id which can be assigned to current spill slot */
1380 sgpr_slot
[id
] = slot_idx
;
1381 is_assigned
[id
] = true;
1382 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++)
1383 spill_slot_interferences
[i
].insert(ctx
.interferences
[id
].second
.begin(), ctx
.interferences
[id
].second
.end());
1385 /* add all affinities: there are no additional interferences */
1386 for (std::vector
<uint32_t>& vec
: ctx
.affinities
) {
1387 bool found_affinity
= false;
1388 for (uint32_t entry
: vec
) {
1390 found_affinity
= true;
1394 if (!found_affinity
)
1396 for (uint32_t entry
: vec
) {
1397 sgpr_slot
[entry
] = slot_idx
;
1398 is_assigned
[entry
] = true;
1405 unsigned sgpr_spill_slots
= spill_slot_interferences
.size();
1406 spill_slot_interferences
.clear();
1410 /* assign vgpr spill slots */
1413 for (unsigned id
= 0; id
< ctx
.interferences
.size(); id
++) {
1414 if (is_assigned
[id
] || !ctx
.is_reloaded
[id
])
1416 if (ctx
.interferences
[id
].first
.type() != RegType::vgpr
)
1419 /* check interferences */
1420 bool interferes
= false;
1421 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++) {
1422 if (i
== spill_slot_interferences
.size())
1423 spill_slot_interferences
.emplace_back(std::set
<uint32_t>());
1424 /* check for interference and ensure that vector regs are stored next to each other */
1425 if (spill_slot_interferences
[i
].find(id
) != spill_slot_interferences
[i
].end()) {
1435 /* we found a spill id which can be assigned to current spill slot */
1436 vgpr_slot
[id
] = slot_idx
;
1437 is_assigned
[id
] = true;
1438 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++)
1439 spill_slot_interferences
[i
].insert(ctx
.interferences
[id
].second
.begin(), ctx
.interferences
[id
].second
.end());
1441 /* add all affinities: there are no additional interferences */
1442 for (std::vector
<uint32_t>& vec
: ctx
.affinities
) {
1443 bool found_affinity
= false;
1444 for (uint32_t entry
: vec
) {
1446 found_affinity
= true;
1450 if (!found_affinity
)
1452 for (uint32_t entry
: vec
) {
1453 vgpr_slot
[entry
] = slot_idx
;
1454 is_assigned
[entry
] = true;
1461 unsigned vgpr_spill_slots
= spill_slot_interferences
.size();
1463 for (unsigned id
= 0; id
< is_assigned
.size(); id
++)
1464 assert(is_assigned
[id
] || !ctx
.is_reloaded
[id
]);
1466 for (std::vector
<uint32_t>& vec
: ctx
.affinities
) {
1467 for (unsigned i
= 0; i
< vec
.size(); i
++) {
1468 for (unsigned j
= i
+ 1; j
< vec
.size(); j
++) {
1469 assert(is_assigned
[vec
[i
]] == is_assigned
[vec
[j
]]);
1470 if (!is_assigned
[vec
[i
]])
1472 assert(ctx
.is_reloaded
[vec
[i
]] == ctx
.is_reloaded
[vec
[j
]]);
1473 assert(ctx
.interferences
[vec
[i
]].first
.type() == ctx
.interferences
[vec
[j
]].first
.type());
1474 if (ctx
.interferences
[vec
[i
]].first
.type() == RegType::sgpr
)
1475 assert(sgpr_slot
[vec
[i
]] == sgpr_slot
[vec
[j
]]);
1477 assert(vgpr_slot
[vec
[i
]] == vgpr_slot
[vec
[j
]]);
1482 /* hope, we didn't mess up */
1483 std::vector
<Temp
> vgpr_spill_temps((sgpr_spill_slots
+ ctx
.wave_size
- 1) / ctx
.wave_size
);
1484 assert(vgpr_spill_temps
.size() <= spills_to_vgpr
);
1486 /* replace pseudo instructions with actual hardware instructions */
1487 Temp scratch_offset
= ctx
.program
->scratch_offset
, scratch_rsrc
= Temp();
1488 unsigned last_top_level_block_idx
= 0;
1489 std::vector
<bool> reload_in_loop(vgpr_spill_temps
.size());
1490 for (Block
& block
: ctx
.program
->blocks
) {
1492 /* after loops, we insert a user if there was a reload inside the loop */
1493 if (block
.loop_nest_depth
== 0) {
1495 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1496 if (reload_in_loop
[i
])
1500 if (end_vgprs
> 0) {
1501 aco_ptr
<Instruction
> destr
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_end_linear_vgpr
, Format::PSEUDO
, end_vgprs
, 0)};
1503 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1504 if (reload_in_loop
[i
])
1505 destr
->operands
[k
++] = Operand(vgpr_spill_temps
[i
]);
1506 reload_in_loop
[i
] = false;
1508 /* find insertion point */
1509 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.begin();
1510 while ((*it
)->opcode
== aco_opcode::p_linear_phi
|| (*it
)->opcode
== aco_opcode::p_phi
)
1512 block
.instructions
.insert(it
, std::move(destr
));
1516 if (block
.kind
& block_kind_top_level
&& !block
.linear_preds
.empty()) {
1517 last_top_level_block_idx
= block
.index
;
1519 /* check if any spilled variables use a created linear vgpr, otherwise destroy them */
1520 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1521 if (vgpr_spill_temps
[i
] == Temp())
1524 bool can_destroy
= true;
1525 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[block
.linear_preds
[0]]) {
1527 if (sgpr_slot
.find(pair
.second
) != sgpr_slot
.end() &&
1528 sgpr_slot
[pair
.second
] / ctx
.wave_size
== i
) {
1529 can_destroy
= false;
1534 vgpr_spill_temps
[i
] = Temp();
1538 std::vector
<aco_ptr
<Instruction
>>::iterator it
;
1539 std::vector
<aco_ptr
<Instruction
>> instructions
;
1540 instructions
.reserve(block
.instructions
.size());
1541 Builder
bld(ctx
.program
, &instructions
);
1542 for (it
= block
.instructions
.begin(); it
!= block
.instructions
.end(); ++it
) {
1544 if ((*it
)->opcode
== aco_opcode::p_spill
) {
1545 uint32_t spill_id
= (*it
)->operands
[1].constantValue();
1547 if (!ctx
.is_reloaded
[spill_id
]) {
1548 /* never reloaded, so don't spill */
1549 } else if (vgpr_slot
.find(spill_id
) != vgpr_slot
.end()) {
1551 ctx
.program
->config
->spilled_vgprs
+= (*it
)->operands
[0].size();
1552 uint32_t spill_slot
= vgpr_slot
[spill_id
];
1553 bool add_offset_to_sgpr
= ctx
.program
->config
->scratch_bytes_per_wave
/ ctx
.program
->wave_size
+ vgpr_spill_slots
* 4 > 4096;
1554 unsigned base_offset
= add_offset_to_sgpr
? 0 : ctx
.program
->config
->scratch_bytes_per_wave
/ ctx
.program
->wave_size
;
1556 /* check if the scratch resource descriptor already exists */
1557 if (scratch_rsrc
== Temp()) {
1558 unsigned offset
= add_offset_to_sgpr
? ctx
.program
->config
->scratch_bytes_per_wave
: 0;
1559 scratch_rsrc
= load_scratch_resource(ctx
, scratch_offset
,
1560 last_top_level_block_idx
== block
.index
?
1561 instructions
: ctx
.program
->blocks
[last_top_level_block_idx
].instructions
,
1563 last_top_level_block_idx
== block
.index
);
1566 unsigned offset
= base_offset
+ spill_slot
* 4;
1567 aco_opcode opcode
= aco_opcode::buffer_store_dword
;
1568 assert((*it
)->operands
[0].isTemp());
1569 Temp temp
= (*it
)->operands
[0].getTemp();
1570 assert(temp
.type() == RegType::vgpr
&& !temp
.is_linear());
1571 if (temp
.size() > 1) {
1572 Instruction
* split
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_split_vector
, Format::PSEUDO
, 1, temp
.size())};
1573 split
->operands
[0] = Operand(temp
);
1574 for (unsigned i
= 0; i
< temp
.size(); i
++)
1575 split
->definitions
[i
] = bld
.def(v1
);
1577 for (unsigned i
= 0; i
< temp
.size(); i
++)
1578 bld
.mubuf(opcode
, scratch_rsrc
, Operand(), scratch_offset
, split
->definitions
[i
].getTemp(), offset
+ i
* 4, false);
1580 bld
.mubuf(opcode
, scratch_rsrc
, Operand(), scratch_offset
, temp
, offset
, false);
1582 } else if (sgpr_slot
.find(spill_id
) != sgpr_slot
.end()) {
1583 ctx
.program
->config
->spilled_sgprs
+= (*it
)->operands
[0].size();
1585 uint32_t spill_slot
= sgpr_slot
[spill_id
];
1587 /* check if the linear vgpr already exists */
1588 if (vgpr_spill_temps
[spill_slot
/ ctx
.wave_size
] == Temp()) {
1589 Temp linear_vgpr
= {ctx
.program
->allocateId(), v1
.as_linear()};
1590 vgpr_spill_temps
[spill_slot
/ ctx
.wave_size
] = linear_vgpr
;
1591 aco_ptr
<Pseudo_instruction
> create
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_start_linear_vgpr
, Format::PSEUDO
, 0, 1)};
1592 create
->definitions
[0] = Definition(linear_vgpr
);
1593 /* find the right place to insert this definition */
1594 if (last_top_level_block_idx
== block
.index
) {
1595 /* insert right before the current instruction */
1596 instructions
.emplace_back(std::move(create
));
1598 assert(last_top_level_block_idx
< block
.index
);
1599 /* insert before the branch at last top level block */
1600 std::vector
<aco_ptr
<Instruction
>>& instructions
= ctx
.program
->blocks
[last_top_level_block_idx
].instructions
;
1601 instructions
.insert(std::next(instructions
.begin(), instructions
.size() - 1), std::move(create
));
1605 /* spill sgpr: just add the vgpr temp to operands */
1606 Pseudo_instruction
* spill
= create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 3, 0);
1607 spill
->operands
[0] = Operand(vgpr_spill_temps
[spill_slot
/ ctx
.wave_size
]);
1608 spill
->operands
[1] = Operand(spill_slot
% ctx
.wave_size
);
1609 spill
->operands
[2] = (*it
)->operands
[0];
1610 instructions
.emplace_back(aco_ptr
<Instruction
>(spill
));
1612 unreachable("No spill slot assigned for spill id");
1615 } else if ((*it
)->opcode
== aco_opcode::p_reload
) {
1616 uint32_t spill_id
= (*it
)->operands
[0].constantValue();
1617 assert(ctx
.is_reloaded
[spill_id
]);
1619 if (vgpr_slot
.find(spill_id
) != vgpr_slot
.end()) {
1621 uint32_t spill_slot
= vgpr_slot
[spill_id
];
1622 bool add_offset_to_sgpr
= ctx
.program
->config
->scratch_bytes_per_wave
/ ctx
.program
->wave_size
+ vgpr_spill_slots
* 4 > 4096;
1623 unsigned base_offset
= add_offset_to_sgpr
? 0 : ctx
.program
->config
->scratch_bytes_per_wave
/ ctx
.program
->wave_size
;
1625 /* check if the scratch resource descriptor already exists */
1626 if (scratch_rsrc
== Temp()) {
1627 unsigned offset
= add_offset_to_sgpr
? ctx
.program
->config
->scratch_bytes_per_wave
: 0;
1628 scratch_rsrc
= load_scratch_resource(ctx
, scratch_offset
,
1629 last_top_level_block_idx
== block
.index
?
1630 instructions
: ctx
.program
->blocks
[last_top_level_block_idx
].instructions
,
1632 last_top_level_block_idx
== block
.index
);
1635 unsigned offset
= base_offset
+ spill_slot
* 4;
1636 aco_opcode opcode
= aco_opcode::buffer_load_dword
;
1637 Definition def
= (*it
)->definitions
[0];
1638 if (def
.size() > 1) {
1639 Instruction
* vec
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_create_vector
, Format::PSEUDO
, def
.size(), 1)};
1640 vec
->definitions
[0] = def
;
1641 for (unsigned i
= 0; i
< def
.size(); i
++) {
1642 Temp tmp
= bld
.tmp(v1
);
1643 vec
->operands
[i
] = Operand(tmp
);
1644 bld
.mubuf(opcode
, Definition(tmp
), scratch_rsrc
, Operand(), scratch_offset
, offset
+ i
* 4, false);
1648 bld
.mubuf(opcode
, def
, scratch_rsrc
, Operand(), scratch_offset
, offset
, false);
1650 } else if (sgpr_slot
.find(spill_id
) != sgpr_slot
.end()) {
1651 uint32_t spill_slot
= sgpr_slot
[spill_id
];
1652 reload_in_loop
[spill_slot
/ ctx
.wave_size
] = block
.loop_nest_depth
> 0;
1654 /* check if the linear vgpr already exists */
1655 if (vgpr_spill_temps
[spill_slot
/ ctx
.wave_size
] == Temp()) {
1656 Temp linear_vgpr
= {ctx
.program
->allocateId(), v1
.as_linear()};
1657 vgpr_spill_temps
[spill_slot
/ ctx
.wave_size
] = linear_vgpr
;
1658 aco_ptr
<Pseudo_instruction
> create
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_start_linear_vgpr
, Format::PSEUDO
, 0, 1)};
1659 create
->definitions
[0] = Definition(linear_vgpr
);
1660 /* find the right place to insert this definition */
1661 if (last_top_level_block_idx
== block
.index
) {
1662 /* insert right before the current instruction */
1663 instructions
.emplace_back(std::move(create
));
1665 assert(last_top_level_block_idx
< block
.index
);
1666 /* insert before the branch at last top level block */
1667 std::vector
<aco_ptr
<Instruction
>>& instructions
= ctx
.program
->blocks
[last_top_level_block_idx
].instructions
;
1668 instructions
.insert(std::next(instructions
.begin(), instructions
.size() - 1), std::move(create
));
1672 /* reload sgpr: just add the vgpr temp to operands */
1673 Pseudo_instruction
* reload
= create_instruction
<Pseudo_instruction
>(aco_opcode::p_reload
, Format::PSEUDO
, 2, 1);
1674 reload
->operands
[0] = Operand(vgpr_spill_temps
[spill_slot
/ ctx
.wave_size
]);
1675 reload
->operands
[1] = Operand(spill_slot
% ctx
.wave_size
);
1676 reload
->definitions
[0] = (*it
)->definitions
[0];
1677 instructions
.emplace_back(aco_ptr
<Instruction
>(reload
));
1679 unreachable("No spill slot assigned for spill id");
1681 } else if (!ctx
.remat_used
.count(it
->get()) || ctx
.remat_used
[it
->get()]) {
1682 instructions
.emplace_back(std::move(*it
));
1686 block
.instructions
= std::move(instructions
);
1689 /* update required scratch memory */
1690 ctx
.program
->config
->scratch_bytes_per_wave
+= align(vgpr_spill_slots
* 4 * ctx
.program
->wave_size
, 1024);
1692 /* SSA elimination inserts copies for logical phis right before p_logical_end
1693 * So if a linear vgpr is used between that p_logical_end and the branch,
1694 * we need to ensure logical phis don't choose a definition which aliases
1696 * TODO: Moving the spills and reloads to before p_logical_end might produce
1697 * slightly better code. */
1698 for (Block
& block
: ctx
.program
->blocks
) {
1699 /* loops exits are already handled */
1700 if (block
.logical_preds
.size() <= 1)
1703 bool has_logical_phis
= false;
1704 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
1705 if (instr
->opcode
== aco_opcode::p_phi
) {
1706 has_logical_phis
= true;
1708 } else if (instr
->opcode
!= aco_opcode::p_linear_phi
) {
1712 if (!has_logical_phis
)
1715 std::set
<Temp
> vgprs
;
1716 for (unsigned pred_idx
: block
.logical_preds
) {
1717 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
1718 for (int i
= pred
.instructions
.size() - 1; i
>= 0; i
--) {
1719 aco_ptr
<Instruction
>& pred_instr
= pred
.instructions
[i
];
1720 if (pred_instr
->opcode
== aco_opcode::p_logical_end
) {
1722 } else if (pred_instr
->opcode
== aco_opcode::p_spill
||
1723 pred_instr
->opcode
== aco_opcode::p_reload
) {
1724 vgprs
.insert(pred_instr
->operands
[0].getTemp());
1731 aco_ptr
<Instruction
> destr
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_end_linear_vgpr
, Format::PSEUDO
, vgprs
.size(), 0)};
1733 for (Temp tmp
: vgprs
) {
1734 destr
->operands
[k
++] = Operand(tmp
);
1736 /* find insertion point */
1737 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.begin();
1738 while ((*it
)->opcode
== aco_opcode::p_linear_phi
|| (*it
)->opcode
== aco_opcode::p_phi
)
1740 block
.instructions
.insert(it
, std::move(destr
));
1744 } /* end namespace */
1747 void spill(Program
* program
, live
& live_vars
, const struct radv_nir_compiler_options
*options
)
1749 program
->config
->spilled_vgprs
= 0;
1750 program
->config
->spilled_sgprs
= 0;
1752 /* no spilling when register pressure is low enough */
1753 if (program
->num_waves
> 0)
1756 /* lower to CSSA before spilling to ensure correctness w.r.t. phis */
1757 lower_to_cssa(program
, live_vars
, options
);
1759 /* calculate target register demand */
1760 RegisterDemand register_target
= program
->max_reg_demand
;
1761 if (register_target
.sgpr
> program
->sgpr_limit
)
1762 register_target
.vgpr
+= (register_target
.sgpr
- program
->sgpr_limit
+ program
->wave_size
- 1 + 32) / program
->wave_size
;
1763 register_target
.sgpr
= program
->sgpr_limit
;
1765 if (register_target
.vgpr
> program
->vgpr_limit
)
1766 register_target
.sgpr
= program
->sgpr_limit
- 5;
1767 int spills_to_vgpr
= (program
->max_reg_demand
.sgpr
- register_target
.sgpr
+ program
->wave_size
- 1 + 32) / program
->wave_size
;
1768 register_target
.vgpr
= program
->vgpr_limit
- spills_to_vgpr
;
1770 /* initialize ctx */
1771 spill_ctx
ctx(register_target
, program
, live_vars
.register_demand
);
1772 compute_global_next_uses(ctx
, live_vars
.live_out
);
1773 get_rematerialize_info(ctx
);
1775 /* create spills and reloads */
1776 for (unsigned i
= 0; i
< program
->blocks
.size(); i
++)
1777 spill_block(ctx
, i
);
1779 /* assign spill slots and DCE rematerialized code */
1780 assign_spill_slots(ctx
, spills_to_vgpr
);
1782 /* update live variable information */
1783 live_vars
= live_var_analysis(program
, options
);
1785 assert(program
->num_waves
> 0);