2 * Copyright © 2018 Valve Corporation
3 * Copyright © 2018 Google
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 #include "vulkan/radv_shader.h"
33 * Implements the spilling algorithm on SSA-form from
34 * "Register Spilling and Live-Range Splitting for SSA-Form Programs"
35 * by Matthias Braun and Sebastian Hack.
47 RegisterDemand target_pressure
;
49 std::vector
<std::vector
<RegisterDemand
>> register_demand
;
50 std::vector
<std::map
<Temp
, Temp
>> renames
;
51 std::vector
<std::map
<Temp
, uint32_t>> spills_entry
;
52 std::vector
<std::map
<Temp
, uint32_t>> spills_exit
;
53 std::vector
<bool> processed
;
54 std::stack
<Block
*> loop_header
;
55 std::vector
<std::map
<Temp
, std::pair
<uint32_t, uint32_t>>> next_use_distances_start
;
56 std::vector
<std::map
<Temp
, std::pair
<uint32_t, uint32_t>>> next_use_distances_end
;
57 std::vector
<std::pair
<RegClass
, std::set
<uint32_t>>> interferences
;
58 std::vector
<std::vector
<uint32_t>> affinities
;
59 std::vector
<bool> is_reloaded
;
60 std::map
<Temp
, remat_info
> remat
;
61 std::map
<Instruction
*, bool> remat_used
;
63 spill_ctx(const RegisterDemand target_pressure
, Program
* program
,
64 std::vector
<std::vector
<RegisterDemand
>> register_demand
)
65 : target_pressure(target_pressure
), program(program
),
66 register_demand(register_demand
), renames(program
->blocks
.size()),
67 spills_entry(program
->blocks
.size()), spills_exit(program
->blocks
.size()),
68 processed(program
->blocks
.size(), false) {}
70 void add_affinity(uint32_t first
, uint32_t second
)
72 unsigned found_first
= affinities
.size();
73 unsigned found_second
= affinities
.size();
74 for (unsigned i
= 0; i
< affinities
.size(); i
++) {
75 std::vector
<uint32_t>& vec
= affinities
[i
];
76 for (uint32_t entry
: vec
) {
79 else if (entry
== second
)
83 if (found_first
== affinities
.size() && found_second
== affinities
.size()) {
84 affinities
.emplace_back(std::vector
<uint32_t>({first
, second
}));
85 } else if (found_first
< affinities
.size() && found_second
== affinities
.size()) {
86 affinities
[found_first
].push_back(second
);
87 } else if (found_second
< affinities
.size() && found_first
== affinities
.size()) {
88 affinities
[found_second
].push_back(first
);
89 } else if (found_first
!= found_second
) {
90 /* merge second into first */
91 affinities
[found_first
].insert(affinities
[found_first
].end(), affinities
[found_second
].begin(), affinities
[found_second
].end());
92 affinities
.erase(std::next(affinities
.begin(), found_second
));
94 assert(found_first
== found_second
);
98 uint32_t allocate_spill_id(RegClass rc
)
100 interferences
.emplace_back(rc
, std::set
<uint32_t>());
101 is_reloaded
.push_back(false);
102 return next_spill_id
++;
105 uint32_t next_spill_id
= 0;
108 int32_t get_dominator(int idx_a
, int idx_b
, Program
* program
, bool is_linear
)
116 while (idx_a
!= idx_b
) {
118 idx_a
= program
->blocks
[idx_a
].linear_idom
;
120 idx_b
= program
->blocks
[idx_b
].linear_idom
;
123 while (idx_a
!= idx_b
) {
125 idx_a
= program
->blocks
[idx_a
].logical_idom
;
127 idx_b
= program
->blocks
[idx_b
].logical_idom
;
134 void next_uses_per_block(spill_ctx
& ctx
, unsigned block_idx
, std::set
<uint32_t>& worklist
)
136 Block
* block
= &ctx
.program
->blocks
[block_idx
];
137 std::map
<Temp
, std::pair
<uint32_t, uint32_t>> next_uses
= ctx
.next_use_distances_end
[block_idx
];
139 /* to compute the next use distance at the beginning of the block, we have to add the block's size */
140 for (std::map
<Temp
, std::pair
<uint32_t, uint32_t>>::iterator it
= next_uses
.begin(); it
!= next_uses
.end(); ++it
)
141 it
->second
.second
= it
->second
.second
+ block
->instructions
.size();
143 int idx
= block
->instructions
.size() - 1;
145 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
147 if (instr
->opcode
== aco_opcode::p_linear_phi
||
148 instr
->opcode
== aco_opcode::p_phi
)
151 for (const Definition
& def
: instr
->definitions
) {
153 next_uses
.erase(def
.getTemp());
156 for (const Operand
& op
: instr
->operands
) {
158 if (op
.isFixed() && op
.physReg() == exec
)
160 if (op
.regClass().type() == RegType::vgpr
&& op
.regClass().is_linear())
163 next_uses
[op
.getTemp()] = {block_idx
, idx
};
168 assert(block_idx
!= 0 || next_uses
.empty());
169 ctx
.next_use_distances_start
[block_idx
] = next_uses
;
171 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
172 assert(instr
->opcode
== aco_opcode::p_linear_phi
|| instr
->opcode
== aco_opcode::p_phi
);
174 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
175 unsigned pred_idx
= instr
->opcode
== aco_opcode::p_phi
?
176 block
->logical_preds
[i
] :
177 block
->linear_preds
[i
];
178 if (instr
->operands
[i
].isTemp()) {
179 if (instr
->operands
[i
].getTemp() == ctx
.program
->blocks
[pred_idx
].live_out_exec
)
181 if (ctx
.next_use_distances_end
[pred_idx
].find(instr
->operands
[i
].getTemp()) == ctx
.next_use_distances_end
[pred_idx
].end() ||
182 ctx
.next_use_distances_end
[pred_idx
][instr
->operands
[i
].getTemp()] != std::pair
<uint32_t, uint32_t>{block_idx
, 0})
183 worklist
.insert(pred_idx
);
184 ctx
.next_use_distances_end
[pred_idx
][instr
->operands
[i
].getTemp()] = {block_idx
, 0};
187 next_uses
.erase(instr
->definitions
[0].getTemp());
191 /* all remaining live vars must be live-out at the predecessors */
192 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: next_uses
) {
193 Temp temp
= pair
.first
;
194 uint32_t distance
= pair
.second
.second
;
195 uint32_t dom
= pair
.second
.first
;
196 std::vector
<unsigned>& preds
= temp
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
197 for (unsigned pred_idx
: preds
) {
198 if (temp
== ctx
.program
->blocks
[pred_idx
].live_out_exec
)
200 if (ctx
.program
->blocks
[pred_idx
].loop_nest_depth
> block
->loop_nest_depth
)
202 if (ctx
.next_use_distances_end
[pred_idx
].find(temp
) != ctx
.next_use_distances_end
[pred_idx
].end()) {
203 dom
= get_dominator(dom
, ctx
.next_use_distances_end
[pred_idx
][temp
].first
, ctx
.program
, temp
.is_linear());
204 distance
= std::min(ctx
.next_use_distances_end
[pred_idx
][temp
].second
, distance
);
206 if (ctx
.next_use_distances_end
[pred_idx
][temp
] != std::pair
<uint32_t, uint32_t>{dom
, distance
})
207 worklist
.insert(pred_idx
);
208 ctx
.next_use_distances_end
[pred_idx
][temp
] = {dom
, distance
};
214 void compute_global_next_uses(spill_ctx
& ctx
, std::vector
<std::set
<Temp
>>& live_out
)
216 ctx
.next_use_distances_start
.resize(ctx
.program
->blocks
.size());
217 ctx
.next_use_distances_end
.resize(ctx
.program
->blocks
.size());
218 std::set
<uint32_t> worklist
;
219 for (Block
& block
: ctx
.program
->blocks
)
220 worklist
.insert(block
.index
);
222 while (!worklist
.empty()) {
223 std::set
<unsigned>::reverse_iterator b_it
= worklist
.rbegin();
224 unsigned block_idx
= *b_it
;
225 worklist
.erase(block_idx
);
226 next_uses_per_block(ctx
, block_idx
, worklist
);
230 bool should_rematerialize(aco_ptr
<Instruction
>& instr
)
232 /* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */
233 if (instr
->format
!= Format::VOP1
&& instr
->format
!= Format::SOP1
&& instr
->format
!= Format::PSEUDO
)
235 /* TODO: pseudo-instruction rematerialization is only supported for p_create_vector */
236 if (instr
->format
== Format::PSEUDO
&& instr
->opcode
!= aco_opcode::p_create_vector
)
239 for (const Operand
& op
: instr
->operands
) {
240 /* TODO: rematerialization using temporaries isn't yet supported */
245 /* TODO: rematerialization with multiple definitions isn't yet supported */
246 if (instr
->definitions
.size() > 1)
252 aco_ptr
<Instruction
> do_reload(spill_ctx
& ctx
, Temp tmp
, Temp new_name
, uint32_t spill_id
)
254 std::map
<Temp
, remat_info
>::iterator remat
= ctx
.remat
.find(tmp
);
255 if (remat
!= ctx
.remat
.end()) {
256 Instruction
*instr
= remat
->second
.instr
;
257 assert((instr
->format
== Format::VOP1
|| instr
->format
== Format::SOP1
|| instr
->format
== Format::PSEUDO
) && "unsupported");
258 assert((instr
->format
!= Format::PSEUDO
|| instr
->opcode
== aco_opcode::p_create_vector
) && "unsupported");
259 assert(instr
->definitions
.size() == 1 && "unsupported");
261 aco_ptr
<Instruction
> res
;
262 if (instr
->format
== Format::VOP1
) {
263 res
.reset(create_instruction
<VOP1_instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
264 } else if (instr
->format
== Format::SOP1
) {
265 res
.reset(create_instruction
<SOP1_instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
266 } else if (instr
->format
== Format::PSEUDO
) {
267 res
.reset(create_instruction
<Instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
269 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
270 res
->operands
[i
] = instr
->operands
[i
];
271 if (instr
->operands
[i
].isTemp()) {
272 assert(false && "unsupported");
273 if (ctx
.remat
.count(instr
->operands
[i
].getTemp()))
274 ctx
.remat_used
[ctx
.remat
[instr
->operands
[i
].getTemp()].instr
] = true;
277 res
->definitions
[0] = Definition(new_name
);
280 aco_ptr
<Pseudo_instruction
> reload
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_reload
, Format::PSEUDO
, 1, 1)};
281 reload
->operands
[0] = Operand(spill_id
);
282 reload
->definitions
[0] = Definition(new_name
);
283 ctx
.is_reloaded
[spill_id
] = true;
288 void get_rematerialize_info(spill_ctx
& ctx
)
290 for (Block
& block
: ctx
.program
->blocks
) {
291 bool logical
= false;
292 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
293 if (instr
->opcode
== aco_opcode::p_logical_start
)
295 else if (instr
->opcode
== aco_opcode::p_logical_end
)
297 if (logical
&& should_rematerialize(instr
)) {
298 for (const Definition
& def
: instr
->definitions
) {
300 ctx
.remat
[def
.getTemp()] = (remat_info
){instr
.get()};
301 ctx
.remat_used
[instr
.get()] = false;
309 std::vector
<std::map
<Temp
, uint32_t>> local_next_uses(spill_ctx
& ctx
, Block
* block
)
311 std::vector
<std::map
<Temp
, uint32_t>> local_next_uses(block
->instructions
.size());
313 std::map
<Temp
, uint32_t> next_uses
;
314 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block
->index
])
315 next_uses
[pair
.first
] = pair
.second
.second
+ block
->instructions
.size();
317 for (int idx
= block
->instructions
.size() - 1; idx
>= 0; idx
--) {
318 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
321 if (instr
->opcode
== aco_opcode::p_phi
|| instr
->opcode
== aco_opcode::p_linear_phi
)
324 for (const Operand
& op
: instr
->operands
) {
325 if (op
.isFixed() && op
.physReg() == exec
)
327 if (op
.regClass().type() == RegType::vgpr
&& op
.regClass().is_linear())
330 next_uses
[op
.getTemp()] = idx
;
332 for (const Definition
& def
: instr
->definitions
) {
334 next_uses
.erase(def
.getTemp());
336 local_next_uses
[idx
] = next_uses
;
338 return local_next_uses
;
342 RegisterDemand
init_live_in_vars(spill_ctx
& ctx
, Block
* block
, unsigned block_idx
)
344 RegisterDemand spilled_registers
;
346 /* first block, nothing was spilled before */
350 /* loop header block */
351 if (block
->loop_nest_depth
> ctx
.program
->blocks
[block_idx
- 1].loop_nest_depth
) {
352 assert(block
->linear_preds
[0] == block_idx
- 1);
353 assert(block
->logical_preds
[0] == block_idx
- 1);
355 /* create new loop_info */
356 ctx
.loop_header
.emplace(block
);
358 /* check how many live-through variables should be spilled */
359 RegisterDemand new_demand
;
360 unsigned i
= block_idx
;
361 while (ctx
.program
->blocks
[i
].loop_nest_depth
>= block
->loop_nest_depth
) {
362 assert(ctx
.program
->blocks
.size() > i
);
363 new_demand
.update(ctx
.program
->blocks
[i
].register_demand
);
366 unsigned loop_end
= i
;
368 /* select live-through vgpr variables */
369 while (new_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
) {
370 unsigned distance
= 0;
372 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block_idx
- 1]) {
373 if (pair
.first
.type() == RegType::vgpr
&&
374 pair
.second
.first
>= loop_end
&&
375 pair
.second
.second
> distance
&&
376 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
377 to_spill
= pair
.first
;
378 distance
= pair
.second
.second
;
385 if (ctx
.spills_exit
[block_idx
- 1].find(to_spill
) == ctx
.spills_exit
[block_idx
- 1].end()) {
386 spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
388 spill_id
= ctx
.spills_exit
[block_idx
- 1][to_spill
];
391 ctx
.spills_entry
[block_idx
][to_spill
] = spill_id
;
392 spilled_registers
.vgpr
+= to_spill
.size();
395 /* select live-through sgpr variables */
396 while (new_demand
.sgpr
- spilled_registers
.sgpr
> ctx
.target_pressure
.sgpr
) {
397 unsigned distance
= 0;
399 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block_idx
- 1]) {
400 if (pair
.first
.type() == RegType::sgpr
&&
401 pair
.second
.first
>= loop_end
&&
402 pair
.second
.second
> distance
&&
403 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
404 to_spill
= pair
.first
;
405 distance
= pair
.second
.second
;
412 if (ctx
.spills_exit
[block_idx
- 1].find(to_spill
) == ctx
.spills_exit
[block_idx
- 1].end()) {
413 spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
415 spill_id
= ctx
.spills_exit
[block_idx
- 1][to_spill
];
418 ctx
.spills_entry
[block_idx
][to_spill
] = spill_id
;
419 spilled_registers
.sgpr
+= to_spill
.size();
425 if (!RegisterDemand(new_demand
- spilled_registers
).exceeds(ctx
.target_pressure
))
426 return spilled_registers
;
428 /* if reg pressure is too high at beginning of loop, add variables with furthest use */
430 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
|| block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
)
433 assert(idx
!= 0 && "loop without phis: TODO");
435 RegisterDemand reg_pressure
= ctx
.register_demand
[block_idx
][idx
] - spilled_registers
;
436 while (reg_pressure
.sgpr
> ctx
.target_pressure
.sgpr
) {
437 unsigned distance
= 0;
439 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
440 if (pair
.first
.type() == RegType::sgpr
&&
441 pair
.second
.second
> distance
&&
442 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
443 to_spill
= pair
.first
;
444 distance
= pair
.second
.second
;
447 assert(distance
!= 0);
449 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
450 spilled_registers
.sgpr
+= to_spill
.size();
451 reg_pressure
.sgpr
-= to_spill
.size();
453 while (reg_pressure
.vgpr
> ctx
.target_pressure
.vgpr
) {
454 unsigned distance
= 0;
456 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
457 if (pair
.first
.type() == RegType::vgpr
&&
458 pair
.second
.second
> distance
&&
459 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
460 to_spill
= pair
.first
;
461 distance
= pair
.second
.second
;
464 assert(distance
!= 0);
465 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
466 spilled_registers
.vgpr
+= to_spill
.size();
467 reg_pressure
.vgpr
-= to_spill
.size();
470 return spilled_registers
;
474 if (block
->linear_preds
.size() == 1 && !(block
->kind
& block_kind_loop_exit
)) {
475 /* keep variables spilled if they are alive and not used in the current block */
476 unsigned pred_idx
= block
->linear_preds
[0];
477 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
478 if (pair
.first
.type() == RegType::sgpr
&&
479 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
480 ctx
.next_use_distances_start
[block_idx
][pair
.first
].second
> block_idx
) {
481 ctx
.spills_entry
[block_idx
].insert(pair
);
482 spilled_registers
.sgpr
+= pair
.first
.size();
485 if (block
->logical_preds
.size() == 1) {
486 pred_idx
= block
->logical_preds
[0];
487 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
488 if (pair
.first
.type() == RegType::vgpr
&&
489 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
490 ctx
.next_use_distances_end
[pred_idx
][pair
.first
].second
> block_idx
) {
491 ctx
.spills_entry
[block_idx
].insert(pair
);
492 spilled_registers
.vgpr
+= pair
.first
.size();
497 /* if register demand is still too high, we just keep all spilled live vars and process the block */
498 if (block
->register_demand
.sgpr
- spilled_registers
.sgpr
> ctx
.target_pressure
.sgpr
) {
499 pred_idx
= block
->linear_preds
[0];
500 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
501 if (pair
.first
.type() == RegType::sgpr
&&
502 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
503 ctx
.spills_entry
[block_idx
].insert(pair
).second
) {
504 spilled_registers
.sgpr
+= pair
.first
.size();
508 if (block
->register_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
&& block
->logical_preds
.size() == 1) {
509 pred_idx
= block
->logical_preds
[0];
510 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
511 if (pair
.first
.type() == RegType::vgpr
&&
512 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
513 ctx
.spills_entry
[block_idx
].insert(pair
).second
) {
514 spilled_registers
.vgpr
+= pair
.first
.size();
519 return spilled_registers
;
522 /* else: merge block */
523 std::set
<Temp
> partial_spills
;
525 /* keep variables spilled on all incoming paths */
526 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
527 std::vector
<unsigned>& preds
= pair
.first
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
528 /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload it.
529 * Otherwise, if any predecessor reloads it, ensure it's reloaded on all other predecessors.
530 * The idea is that it's better in practice to rematerialize redundantly than to create lots of phis. */
531 /* TODO: test this idea with more than Dawn of War III shaders (the current pipeline-db doesn't seem to exercise this path much) */
532 bool remat
= ctx
.remat
.count(pair
.first
);
534 uint32_t spill_id
= 0;
535 for (unsigned pred_idx
: preds
) {
536 /* variable is not even live at the predecessor: probably from a phi */
537 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end()) {
541 if (ctx
.spills_exit
[pred_idx
].find(pair
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
545 partial_spills
.insert(pair
.first
);
546 /* it might be that on one incoming path, the variable has a different spill_id, but add_couple_code() will take care of that. */
547 spill_id
= ctx
.spills_exit
[pred_idx
][pair
.first
];
553 ctx
.spills_entry
[block_idx
][pair
.first
] = spill_id
;
554 partial_spills
.erase(pair
.first
);
555 spilled_registers
+= pair
.first
;
561 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
||
562 block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
) {
563 aco_ptr
<Instruction
>& phi
= block
->instructions
[idx
];
564 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
567 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
568 if (phi
->operands
[i
].isUndefined())
570 assert(phi
->operands
[i
].isTemp());
571 if (ctx
.spills_exit
[preds
[i
]].find(phi
->operands
[i
].getTemp()) == ctx
.spills_exit
[preds
[i
]].end())
574 partial_spills
.insert(phi
->definitions
[0].getTemp());
577 ctx
.spills_entry
[block_idx
][phi
->definitions
[0].getTemp()] = ctx
.allocate_spill_id(phi
->definitions
[0].regClass());
578 partial_spills
.erase(phi
->definitions
[0].getTemp());
579 spilled_registers
+= phi
->definitions
[0].getTemp();
585 /* if reg pressure at first instruction is still too high, add partially spilled variables */
586 RegisterDemand reg_pressure
;
588 for (const Definition
& def
: block
->instructions
[idx
]->definitions
) {
590 reg_pressure
-= def
.getTemp();
593 for (const Operand
& op
: block
->instructions
[idx
]->operands
) {
594 if (op
.isTemp() && op
.isFirstKill()) {
595 reg_pressure
+= op
.getTemp();
601 reg_pressure
+= ctx
.register_demand
[block_idx
][idx
] - spilled_registers
;
603 while (reg_pressure
.sgpr
> ctx
.target_pressure
.sgpr
) {
604 assert(!partial_spills
.empty());
606 std::set
<Temp
>::iterator it
= partial_spills
.begin();
608 unsigned distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
609 while (it
!= partial_spills
.end()) {
610 assert(ctx
.spills_entry
[block_idx
].find(*it
) == ctx
.spills_entry
[block_idx
].end());
612 if (it
->type() == RegType::sgpr
&& ctx
.next_use_distances_start
[block_idx
][*it
].second
> distance
) {
613 distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
618 assert(distance
!= 0);
620 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
621 partial_spills
.erase(to_spill
);
622 spilled_registers
.sgpr
+= to_spill
.size();
623 reg_pressure
.sgpr
-= to_spill
.size();
626 while (reg_pressure
.vgpr
> ctx
.target_pressure
.vgpr
) {
627 assert(!partial_spills
.empty());
629 std::set
<Temp
>::iterator it
= partial_spills
.begin();
631 unsigned distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
632 while (it
!= partial_spills
.end()) {
633 assert(ctx
.spills_entry
[block_idx
].find(*it
) == ctx
.spills_entry
[block_idx
].end());
635 if (it
->type() == RegType::vgpr
&& ctx
.next_use_distances_start
[block_idx
][*it
].second
> distance
) {
636 distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
641 assert(distance
!= 0);
643 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
644 partial_spills
.erase(to_spill
);
645 spilled_registers
.vgpr
+= to_spill
.size();
646 reg_pressure
.vgpr
-= to_spill
.size();
649 return spilled_registers
;
653 void add_coupling_code(spill_ctx
& ctx
, Block
* block
, unsigned block_idx
)
655 /* no coupling code necessary */
656 if (block
->linear_preds
.size() == 0)
659 std::vector
<aco_ptr
<Instruction
>> instructions
;
660 /* branch block: TODO take other branch into consideration */
661 if (block
->linear_preds
.size() == 1 && !(block
->kind
& block_kind_loop_exit
)) {
662 assert(ctx
.processed
[block
->linear_preds
[0]]);
663 assert(ctx
.register_demand
[block_idx
].size() == block
->instructions
.size());
664 std::vector
<RegisterDemand
> reg_demand
;
665 unsigned insert_idx
= 0;
666 unsigned pred_idx
= block
->linear_preds
[0];
668 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> live
: ctx
.next_use_distances_start
[block_idx
]) {
669 if (!live
.first
.is_linear())
672 if (ctx
.spills_entry
[block_idx
].find(live
.first
) != ctx
.spills_entry
[block_idx
].end())
675 /* in register at end of predecessor */
676 if (ctx
.spills_exit
[pred_idx
].find(live
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
677 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(live
.first
);
678 if (it
!= ctx
.renames
[pred_idx
].end())
679 ctx
.renames
[block_idx
].insert(*it
);
683 /* variable is spilled at predecessor and live at current block: create reload instruction */
684 Temp new_name
= {ctx
.program
->allocateId(), live
.first
.regClass()};
685 aco_ptr
<Instruction
> reload
= do_reload(ctx
, live
.first
, new_name
, ctx
.spills_exit
[pred_idx
][live
.first
]);
686 instructions
.emplace_back(std::move(reload
));
687 reg_demand
.push_back(RegisterDemand());
688 ctx
.renames
[block_idx
][live
.first
] = new_name
;
691 if (block
->logical_preds
.size() == 1) {
693 assert(insert_idx
< block
->instructions
.size());
694 instructions
.emplace_back(std::move(block
->instructions
[insert_idx
]));
695 reg_demand
.push_back(ctx
.register_demand
[block_idx
][insert_idx
]);
697 } while (instructions
.back()->opcode
!= aco_opcode::p_logical_start
);
699 unsigned pred_idx
= block
->logical_preds
[0];
700 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> live
: ctx
.next_use_distances_start
[block_idx
]) {
701 if (live
.first
.is_linear())
704 if (ctx
.spills_entry
[block_idx
].find(live
.first
) != ctx
.spills_entry
[block_idx
].end())
707 /* in register at end of predecessor */
708 if (ctx
.spills_exit
[pred_idx
].find(live
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
709 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(live
.first
);
710 if (it
!= ctx
.renames
[pred_idx
].end())
711 ctx
.renames
[block_idx
].insert(*it
);
715 /* variable is spilled at predecessor and live at current block: create reload instruction */
716 Temp new_name
= {ctx
.program
->allocateId(), live
.first
.regClass()};
717 aco_ptr
<Instruction
> reload
= do_reload(ctx
, live
.first
, new_name
, ctx
.spills_exit
[pred_idx
][live
.first
]);
718 instructions
.emplace_back(std::move(reload
));
719 reg_demand
.emplace_back(reg_demand
.back());
720 ctx
.renames
[block_idx
][live
.first
] = new_name
;
724 /* combine new reload instructions with original block */
725 if (!instructions
.empty()) {
726 reg_demand
.insert(reg_demand
.end(), std::next(ctx
.register_demand
[block
->index
].begin(), insert_idx
),
727 ctx
.register_demand
[block
->index
].end());
728 ctx
.register_demand
[block_idx
] = std::move(reg_demand
);
729 instructions
.insert(instructions
.end(),
730 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(std::next(block
->instructions
.begin(), insert_idx
)),
731 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(block
->instructions
.end()));
732 block
->instructions
= std::move(instructions
);
737 /* loop header and merge blocks: check if all (linear) predecessors have been processed */
738 for (ASSERTED
unsigned pred
: block
->linear_preds
)
739 assert(ctx
.processed
[pred
]);
741 /* iterate the phi nodes for which operands to spill at the predecessor */
742 for (aco_ptr
<Instruction
>& phi
: block
->instructions
) {
743 if (phi
->opcode
!= aco_opcode::p_phi
&&
744 phi
->opcode
!= aco_opcode::p_linear_phi
)
747 /* if the phi is not spilled, add to instructions */
748 if (ctx
.spills_entry
[block_idx
].find(phi
->definitions
[0].getTemp()) == ctx
.spills_entry
[block_idx
].end()) {
749 instructions
.emplace_back(std::move(phi
));
753 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
754 uint32_t def_spill_id
= ctx
.spills_entry
[block_idx
][phi
->definitions
[0].getTemp()];
756 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
757 if (phi
->operands
[i
].isUndefined())
760 unsigned pred_idx
= preds
[i
];
761 assert(phi
->operands
[i
].isTemp() && phi
->operands
[i
].isKill());
762 Temp var
= phi
->operands
[i
].getTemp();
764 /* build interferences between the phi def and all spilled variables at the predecessor blocks */
765 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
766 if (var
== pair
.first
)
768 ctx
.interferences
[def_spill_id
].second
.emplace(pair
.second
);
769 ctx
.interferences
[pair
.second
].second
.emplace(def_spill_id
);
772 /* check if variable is already spilled at predecessor */
773 std::map
<Temp
, uint32_t>::iterator spilled
= ctx
.spills_exit
[pred_idx
].find(var
);
774 if (spilled
!= ctx
.spills_exit
[pred_idx
].end()) {
775 if (spilled
->second
!= def_spill_id
)
776 ctx
.add_affinity(def_spill_id
, spilled
->second
);
780 /* rename if necessary */
781 std::map
<Temp
, Temp
>::iterator rename_it
= ctx
.renames
[pred_idx
].find(var
);
782 if (rename_it
!= ctx
.renames
[pred_idx
].end()) {
783 var
= rename_it
->second
;
784 ctx
.renames
[pred_idx
].erase(rename_it
);
787 uint32_t spill_id
= ctx
.allocate_spill_id(phi
->definitions
[0].regClass());
788 ctx
.add_affinity(def_spill_id
, spill_id
);
789 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
790 spill
->operands
[0] = Operand(var
);
791 spill
->operands
[1] = Operand(spill_id
);
792 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
793 unsigned idx
= pred
.instructions
.size();
797 } while (phi
->opcode
== aco_opcode::p_phi
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
798 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
799 pred
.instructions
.insert(it
, std::move(spill
));
800 ctx
.spills_exit
[pred_idx
][phi
->operands
[i
].getTemp()] = spill_id
;
803 /* remove phi from instructions */
807 /* iterate all (other) spilled variables for which to spill at the predecessor */
808 // TODO: would be better to have them sorted: first vgprs and first with longest distance
809 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_entry
[block_idx
]) {
810 std::vector
<unsigned> preds
= pair
.first
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
812 for (unsigned pred_idx
: preds
) {
813 /* variable is already spilled at predecessor */
814 std::map
<Temp
, uint32_t>::iterator spilled
= ctx
.spills_exit
[pred_idx
].find(pair
.first
);
815 if (spilled
!= ctx
.spills_exit
[pred_idx
].end()) {
816 if (spilled
->second
!= pair
.second
)
817 ctx
.add_affinity(pair
.second
, spilled
->second
);
821 /* variable is dead at predecessor, it must be from a phi: this works because of CSSA form */
822 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end())
825 /* add interferences between spilled variable and predecessors exit spills */
826 for (std::pair
<Temp
, uint32_t> exit_spill
: ctx
.spills_exit
[pred_idx
]) {
827 if (exit_spill
.first
== pair
.first
)
829 ctx
.interferences
[exit_spill
.second
].second
.emplace(pair
.second
);
830 ctx
.interferences
[pair
.second
].second
.emplace(exit_spill
.second
);
833 /* variable is in register at predecessor and has to be spilled */
834 /* rename if necessary */
835 Temp var
= pair
.first
;
836 std::map
<Temp
, Temp
>::iterator rename_it
= ctx
.renames
[pred_idx
].find(var
);
837 if (rename_it
!= ctx
.renames
[pred_idx
].end()) {
838 var
= rename_it
->second
;
839 ctx
.renames
[pred_idx
].erase(rename_it
);
842 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
843 spill
->operands
[0] = Operand(var
);
844 spill
->operands
[1] = Operand(pair
.second
);
845 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
846 unsigned idx
= pred
.instructions
.size();
850 } while (pair
.first
.type() == RegType::vgpr
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
851 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
852 pred
.instructions
.insert(it
, std::move(spill
));
853 ctx
.spills_exit
[pred
.index
][pair
.first
] = pair
.second
;
857 /* iterate phis for which operands to reload */
858 for (aco_ptr
<Instruction
>& phi
: instructions
) {
859 assert(phi
->opcode
== aco_opcode::p_phi
|| phi
->opcode
== aco_opcode::p_linear_phi
);
860 assert(ctx
.spills_entry
[block_idx
].find(phi
->definitions
[0].getTemp()) == ctx
.spills_entry
[block_idx
].end());
862 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
863 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
864 if (!phi
->operands
[i
].isTemp())
866 unsigned pred_idx
= preds
[i
];
869 if (ctx
.spills_exit
[pred_idx
].find(phi
->operands
[i
].getTemp()) == ctx
.spills_exit
[pred_idx
].end()) {
870 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(phi
->operands
[i
].getTemp());
871 if (it
!= ctx
.renames
[pred_idx
].end())
872 phi
->operands
[i
].setTemp(it
->second
);
876 Temp tmp
= phi
->operands
[i
].getTemp();
878 /* reload phi operand at end of predecessor block */
879 Temp new_name
= {ctx
.program
->allocateId(), tmp
.regClass()};
880 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
881 unsigned idx
= pred
.instructions
.size();
885 } while (phi
->opcode
== aco_opcode::p_phi
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
886 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
888 aco_ptr
<Instruction
> reload
= do_reload(ctx
, tmp
, new_name
, ctx
.spills_exit
[pred_idx
][tmp
]);
889 pred
.instructions
.insert(it
, std::move(reload
));
891 ctx
.spills_exit
[pred_idx
].erase(tmp
);
892 ctx
.renames
[pred_idx
][tmp
] = new_name
;
893 phi
->operands
[i
].setTemp(new_name
);
897 /* iterate live variables for which to reload */
898 // TODO: reload at current block if variable is spilled on all predecessors
899 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
900 /* skip spilled variables */
901 if (ctx
.spills_entry
[block_idx
].find(pair
.first
) != ctx
.spills_entry
[block_idx
].end())
903 std::vector
<unsigned> preds
= pair
.first
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
905 /* variable is dead at predecessor, it must be from a phi */
906 bool is_dead
= false;
907 for (unsigned pred_idx
: preds
) {
908 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end())
913 for (unsigned pred_idx
: preds
) {
914 /* the variable is not spilled at the predecessor */
915 if (ctx
.spills_exit
[pred_idx
].find(pair
.first
) == ctx
.spills_exit
[pred_idx
].end())
918 /* variable is spilled at predecessor and has to be reloaded */
919 Temp new_name
= {ctx
.program
->allocateId(), pair
.first
.regClass()};
920 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
921 unsigned idx
= pred
.instructions
.size();
925 } while (pair
.first
.type() == RegType::vgpr
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
926 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
928 aco_ptr
<Instruction
> reload
= do_reload(ctx
, pair
.first
, new_name
, ctx
.spills_exit
[pred
.index
][pair
.first
]);
929 pred
.instructions
.insert(it
, std::move(reload
));
931 ctx
.spills_exit
[pred
.index
].erase(pair
.first
);
932 ctx
.renames
[pred
.index
][pair
.first
] = new_name
;
935 /* check if we have to create a new phi for this variable */
936 Temp rename
= Temp();
938 for (unsigned pred_idx
: preds
) {
939 if (ctx
.renames
[pred_idx
].find(pair
.first
) == ctx
.renames
[pred_idx
].end()) {
940 if (rename
== Temp())
943 is_same
= rename
== pair
.first
;
945 if (rename
== Temp())
946 rename
= ctx
.renames
[pred_idx
][pair
.first
];
948 is_same
= rename
== ctx
.renames
[pred_idx
][pair
.first
];
956 /* the variable was renamed differently in the predecessors: we have to create a phi */
957 aco_opcode opcode
= pair
.first
.is_linear() ? aco_opcode::p_linear_phi
: aco_opcode::p_phi
;
958 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(opcode
, Format::PSEUDO
, preds
.size(), 1)};
959 rename
= {ctx
.program
->allocateId(), pair
.first
.regClass()};
960 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
962 if (ctx
.renames
[preds
[i
]].find(pair
.first
) != ctx
.renames
[preds
[i
]].end())
963 tmp
= ctx
.renames
[preds
[i
]][pair
.first
];
964 else if (preds
[i
] >= block_idx
)
968 phi
->operands
[i
] = Operand(tmp
);
970 phi
->definitions
[0] = Definition(rename
);
971 instructions
.emplace_back(std::move(phi
));
974 /* the variable was renamed: add new name to renames */
975 if (!(rename
== Temp() || rename
== pair
.first
))
976 ctx
.renames
[block_idx
][pair
.first
] = rename
;
979 /* combine phis with instructions */
981 while (!block
->instructions
[idx
]) {
985 ctx
.register_demand
[block
->index
].erase(ctx
.register_demand
[block
->index
].begin(), ctx
.register_demand
[block
->index
].begin() + idx
);
986 ctx
.register_demand
[block
->index
].insert(ctx
.register_demand
[block
->index
].begin(), instructions
.size(), RegisterDemand());
988 std::vector
<aco_ptr
<Instruction
>>::iterator start
= std::next(block
->instructions
.begin(), idx
);
989 instructions
.insert(instructions
.end(), std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(start
),
990 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(block
->instructions
.end()));
991 block
->instructions
= std::move(instructions
);
994 void process_block(spill_ctx
& ctx
, unsigned block_idx
, Block
* block
,
995 std::map
<Temp
, uint32_t> ¤t_spills
, RegisterDemand spilled_registers
)
997 std::vector
<std::map
<Temp
, uint32_t>> local_next_use_distance
;
998 std::vector
<aco_ptr
<Instruction
>> instructions
;
1001 /* phis are handled separetely */
1002 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
||
1003 block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
) {
1004 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
1005 for (const Operand
& op
: instr
->operands
) {
1006 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
1007 if (op
.isTemp() && ctx
.remat
.count(op
.getTemp()))
1008 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
1010 instructions
.emplace_back(std::move(instr
));
1014 if (block
->register_demand
.exceeds(ctx
.target_pressure
))
1015 local_next_use_distance
= local_next_uses(ctx
, block
);
1017 while (idx
< block
->instructions
.size()) {
1018 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
1020 std::map
<Temp
, std::pair
<Temp
, uint32_t>> reloads
;
1021 std::map
<Temp
, uint32_t> spills
;
1022 /* rename and reload operands */
1023 for (Operand
& op
: instr
->operands
) {
1026 if (current_spills
.find(op
.getTemp()) == current_spills
.end()) {
1027 /* the Operand is in register: check if it was renamed */
1028 if (ctx
.renames
[block_idx
].find(op
.getTemp()) != ctx
.renames
[block_idx
].end())
1029 op
.setTemp(ctx
.renames
[block_idx
][op
.getTemp()]);
1030 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
1031 if (ctx
.remat
.count(op
.getTemp()))
1032 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
1035 /* the Operand is spilled: add it to reloads */
1036 Temp new_tmp
= {ctx
.program
->allocateId(), op
.regClass()};
1037 ctx
.renames
[block_idx
][op
.getTemp()] = new_tmp
;
1038 reloads
[new_tmp
] = std::make_pair(op
.getTemp(), current_spills
[op
.getTemp()]);
1039 current_spills
.erase(op
.getTemp());
1040 op
.setTemp(new_tmp
);
1041 spilled_registers
-= new_tmp
;
1044 /* check if register demand is low enough before and after the current instruction */
1045 if (block
->register_demand
.exceeds(ctx
.target_pressure
)) {
1047 RegisterDemand new_demand
= ctx
.register_demand
[block_idx
][idx
];
1049 RegisterDemand demand_before
= new_demand
;
1050 for (const Definition
& def
: instr
->definitions
)
1051 demand_before
-= def
.getTemp();
1052 for (const Operand
& op
: instr
->operands
) {
1053 if (op
.isFirstKill())
1054 demand_before
+= op
.getTemp();
1056 new_demand
.update(demand_before
);
1058 new_demand
.update(ctx
.register_demand
[block_idx
][idx
- 1]);
1061 assert(!local_next_use_distance
.empty());
1063 /* if reg pressure is too high, spill variable with furthest next use */
1064 while (RegisterDemand(new_demand
- spilled_registers
).exceeds(ctx
.target_pressure
)) {
1065 unsigned distance
= 0;
1067 bool do_rematerialize
= false;
1068 if (new_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
) {
1069 for (std::pair
<Temp
, uint32_t> pair
: local_next_use_distance
[idx
]) {
1070 bool can_rematerialize
= ctx
.remat
.count(pair
.first
);
1071 if (pair
.first
.type() == RegType::vgpr
&&
1072 ((pair
.second
> distance
&& can_rematerialize
== do_rematerialize
) ||
1073 (can_rematerialize
&& !do_rematerialize
&& pair
.second
> idx
)) &&
1074 current_spills
.find(pair
.first
) == current_spills
.end() &&
1075 ctx
.spills_exit
[block_idx
].find(pair
.first
) == ctx
.spills_exit
[block_idx
].end()) {
1076 to_spill
= pair
.first
;
1077 distance
= pair
.second
;
1078 do_rematerialize
= can_rematerialize
;
1082 for (std::pair
<Temp
, uint32_t> pair
: local_next_use_distance
[idx
]) {
1083 bool can_rematerialize
= ctx
.remat
.count(pair
.first
);
1084 if (pair
.first
.type() == RegType::sgpr
&&
1085 ((pair
.second
> distance
&& can_rematerialize
== do_rematerialize
) ||
1086 (can_rematerialize
&& !do_rematerialize
&& pair
.second
> idx
)) &&
1087 current_spills
.find(pair
.first
) == current_spills
.end() &&
1088 ctx
.spills_exit
[block_idx
].find(pair
.first
) == ctx
.spills_exit
[block_idx
].end()) {
1089 to_spill
= pair
.first
;
1090 distance
= pair
.second
;
1091 do_rematerialize
= can_rematerialize
;
1096 assert(distance
!= 0 && distance
> idx
);
1097 uint32_t spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
1099 /* add interferences with currently spilled variables */
1100 for (std::pair
<Temp
, uint32_t> pair
: current_spills
) {
1101 ctx
.interferences
[spill_id
].second
.emplace(pair
.second
);
1102 ctx
.interferences
[pair
.second
].second
.emplace(spill_id
);
1104 for (std::pair
<Temp
, std::pair
<Temp
, uint32_t>> pair
: reloads
) {
1105 ctx
.interferences
[spill_id
].second
.emplace(pair
.second
.second
);
1106 ctx
.interferences
[pair
.second
.second
].second
.emplace(spill_id
);
1109 current_spills
[to_spill
] = spill_id
;
1110 spilled_registers
+= to_spill
;
1112 /* rename if necessary */
1113 if (ctx
.renames
[block_idx
].find(to_spill
) != ctx
.renames
[block_idx
].end()) {
1114 to_spill
= ctx
.renames
[block_idx
][to_spill
];
1117 /* add spill to new instructions */
1118 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
1119 spill
->operands
[0] = Operand(to_spill
);
1120 spill
->operands
[1] = Operand(spill_id
);
1121 instructions
.emplace_back(std::move(spill
));
1125 /* add reloads and instruction to new instructions */
1126 for (std::pair
<Temp
, std::pair
<Temp
, uint32_t>> pair
: reloads
) {
1127 aco_ptr
<Instruction
> reload
= do_reload(ctx
, pair
.second
.first
, pair
.first
, pair
.second
.second
);
1128 instructions
.emplace_back(std::move(reload
));
1130 instructions
.emplace_back(std::move(instr
));
1134 block
->instructions
= std::move(instructions
);
1135 ctx
.spills_exit
[block_idx
].insert(current_spills
.begin(), current_spills
.end());
1138 void spill_block(spill_ctx
& ctx
, unsigned block_idx
)
1140 Block
* block
= &ctx
.program
->blocks
[block_idx
];
1141 ctx
.processed
[block_idx
] = true;
1143 /* determine set of variables which are spilled at the beginning of the block */
1144 RegisterDemand spilled_registers
= init_live_in_vars(ctx
, block
, block_idx
);
1146 /* add interferences for spilled variables */
1147 for (std::pair
<Temp
, uint32_t> x
: ctx
.spills_entry
[block_idx
]) {
1148 for (std::pair
<Temp
, uint32_t> y
: ctx
.spills_entry
[block_idx
])
1149 if (x
.second
!= y
.second
)
1150 ctx
.interferences
[x
.second
].second
.emplace(y
.second
);
1153 bool is_loop_header
= block
->loop_nest_depth
&& ctx
.loop_header
.top()->index
== block_idx
;
1154 if (!is_loop_header
) {
1155 /* add spill/reload code on incoming control flow edges */
1156 add_coupling_code(ctx
, block
, block_idx
);
1159 std::map
<Temp
, uint32_t> current_spills
= ctx
.spills_entry
[block_idx
];
1161 /* check conditions to process this block */
1162 bool process
= RegisterDemand(block
->register_demand
- spilled_registers
).exceeds(ctx
.target_pressure
) ||
1163 !ctx
.renames
[block_idx
].empty() ||
1164 ctx
.remat_used
.size();
1166 std::map
<Temp
, uint32_t>::iterator it
= current_spills
.begin();
1167 while (!process
&& it
!= current_spills
.end()) {
1168 if (ctx
.next_use_distances_start
[block_idx
][it
->first
].first
== block_idx
)
1174 process_block(ctx
, block_idx
, block
, current_spills
, spilled_registers
);
1176 ctx
.spills_exit
[block_idx
].insert(current_spills
.begin(), current_spills
.end());
1178 /* check if the next block leaves the current loop */
1179 if (block
->loop_nest_depth
== 0 || ctx
.program
->blocks
[block_idx
+ 1].loop_nest_depth
>= block
->loop_nest_depth
)
1182 Block
* loop_header
= ctx
.loop_header
.top();
1184 /* preserve original renames at end of loop header block */
1185 std::map
<Temp
, Temp
> renames
= std::move(ctx
.renames
[loop_header
->index
]);
1187 /* add coupling code to all loop header predecessors */
1188 add_coupling_code(ctx
, loop_header
, loop_header
->index
);
1190 /* update remat_used for phis added in add_coupling_code() */
1191 for (aco_ptr
<Instruction
>& instr
: loop_header
->instructions
) {
1194 for (const Operand
& op
: instr
->operands
) {
1195 if (op
.isTemp() && ctx
.remat
.count(op
.getTemp()))
1196 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
1200 /* propagate new renames through loop: i.e. repair the SSA */
1201 renames
.swap(ctx
.renames
[loop_header
->index
]);
1202 for (std::pair
<Temp
, Temp
> rename
: renames
) {
1203 for (unsigned idx
= loop_header
->index
; idx
<= block_idx
; idx
++) {
1204 Block
& current
= ctx
.program
->blocks
[idx
];
1205 std::vector
<aco_ptr
<Instruction
>>::iterator instr_it
= current
.instructions
.begin();
1207 /* first rename phis */
1208 while (instr_it
!= current
.instructions
.end()) {
1209 aco_ptr
<Instruction
>& phi
= *instr_it
;
1210 if (phi
->opcode
!= aco_opcode::p_phi
&& phi
->opcode
!= aco_opcode::p_linear_phi
)
1212 /* no need to rename the loop header phis once again. this happened in add_coupling_code() */
1213 if (idx
== loop_header
->index
) {
1218 for (Operand
& op
: phi
->operands
) {
1221 if (op
.getTemp() == rename
.first
)
1222 op
.setTemp(rename
.second
);
1227 std::map
<Temp
, std::pair
<uint32_t, uint32_t>>::iterator it
= ctx
.next_use_distances_start
[idx
].find(rename
.first
);
1229 /* variable is not live at beginning of this block */
1230 if (it
== ctx
.next_use_distances_start
[idx
].end())
1233 /* if the variable is live at the block's exit, add rename */
1234 if (ctx
.next_use_distances_end
[idx
].find(rename
.first
) != ctx
.next_use_distances_end
[idx
].end())
1235 ctx
.renames
[idx
].insert(rename
);
1237 /* rename all uses in this block */
1238 bool renamed
= false;
1239 while (!renamed
&& instr_it
!= current
.instructions
.end()) {
1240 aco_ptr
<Instruction
>& instr
= *instr_it
;
1241 for (Operand
& op
: instr
->operands
) {
1244 if (op
.getTemp() == rename
.first
) {
1245 op
.setTemp(rename
.second
);
1246 /* we can stop with this block as soon as the variable is spilled */
1247 if (instr
->opcode
== aco_opcode::p_spill
)
1256 /* remove loop header info from stack */
1257 ctx
.loop_header
.pop();
1260 void assign_spill_slots(spill_ctx
& ctx
, unsigned spills_to_vgpr
) {
1261 std::map
<uint32_t, uint32_t> sgpr_slot
;
1262 std::map
<uint32_t, uint32_t> vgpr_slot
;
1263 std::vector
<bool> is_assigned(ctx
.interferences
.size());
1265 /* first, handle affinities: just merge all interferences into both spill ids */
1266 for (std::vector
<uint32_t>& vec
: ctx
.affinities
) {
1267 for (unsigned i
= 0; i
< vec
.size(); i
++) {
1268 for (unsigned j
= i
+ 1; j
< vec
.size(); j
++) {
1269 assert(vec
[i
] != vec
[j
]);
1270 for (uint32_t id
: ctx
.interferences
[vec
[i
]].second
)
1271 ctx
.interferences
[id
].second
.insert(vec
[j
]);
1272 for (uint32_t id
: ctx
.interferences
[vec
[j
]].second
)
1273 ctx
.interferences
[id
].second
.insert(vec
[i
]);
1274 ctx
.interferences
[vec
[i
]].second
.insert(ctx
.interferences
[vec
[j
]].second
.begin(), ctx
.interferences
[vec
[j
]].second
.end());
1275 ctx
.interferences
[vec
[j
]].second
.insert(ctx
.interferences
[vec
[i
]].second
.begin(), ctx
.interferences
[vec
[i
]].second
.end());
1277 bool reloaded
= ctx
.is_reloaded
[vec
[i
]] || ctx
.is_reloaded
[vec
[j
]];
1278 ctx
.is_reloaded
[vec
[i
]] = reloaded
;
1279 ctx
.is_reloaded
[vec
[j
]] = reloaded
;
1283 for (ASSERTED
uint32_t i
= 0; i
< ctx
.interferences
.size(); i
++)
1284 for (ASSERTED
uint32_t id
: ctx
.interferences
[i
].second
)
1287 /* for each spill slot, assign as many spill ids as possible */
1288 std::vector
<std::set
<uint32_t>> spill_slot_interferences
;
1289 unsigned slot_idx
= 0;
1292 /* assign sgpr spill slots */
1295 for (unsigned id
= 0; id
< ctx
.interferences
.size(); id
++) {
1296 if (is_assigned
[id
] || !ctx
.is_reloaded
[id
])
1298 if (ctx
.interferences
[id
].first
.type() != RegType::sgpr
)
1301 /* check interferences */
1302 bool interferes
= false;
1303 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++) {
1304 if (i
== spill_slot_interferences
.size())
1305 spill_slot_interferences
.emplace_back(std::set
<uint32_t>());
1306 if (spill_slot_interferences
[i
].find(id
) != spill_slot_interferences
[i
].end() || i
/ 64 != slot_idx
/ 64) {
1316 /* we found a spill id which can be assigned to current spill slot */
1317 sgpr_slot
[id
] = slot_idx
;
1318 is_assigned
[id
] = true;
1319 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++)
1320 spill_slot_interferences
[i
].insert(ctx
.interferences
[id
].second
.begin(), ctx
.interferences
[id
].second
.end());
1322 /* add all affinities: there are no additional interferences */
1323 for (std::vector
<uint32_t>& vec
: ctx
.affinities
) {
1324 bool found_affinity
= false;
1325 for (uint32_t entry
: vec
) {
1327 found_affinity
= true;
1331 if (!found_affinity
)
1333 for (uint32_t entry
: vec
) {
1334 sgpr_slot
[entry
] = slot_idx
;
1335 is_assigned
[entry
] = true;
1345 /* assign vgpr spill slots */
1348 for (unsigned id
= 0; id
< ctx
.interferences
.size(); id
++) {
1349 if (is_assigned
[id
] || !ctx
.is_reloaded
[id
])
1351 if (ctx
.interferences
[id
].first
.type() != RegType::vgpr
)
1354 /* check interferences */
1355 bool interferes
= false;
1356 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++) {
1357 if (i
== spill_slot_interferences
.size())
1358 spill_slot_interferences
.emplace_back(std::set
<uint32_t>());
1359 /* check for interference and ensure that vector regs are stored next to each other */
1360 if (spill_slot_interferences
[i
].find(id
) != spill_slot_interferences
[i
].end() || i
/ 64 != slot_idx
/ 64) {
1370 /* we found a spill id which can be assigned to current spill slot */
1371 vgpr_slot
[id
] = slot_idx
;
1372 is_assigned
[id
] = true;
1373 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++)
1374 spill_slot_interferences
[i
].insert(ctx
.interferences
[id
].second
.begin(), ctx
.interferences
[id
].second
.end());
1379 for (unsigned id
= 0; id
< is_assigned
.size(); id
++)
1380 assert(is_assigned
[id
] || !ctx
.is_reloaded
[id
]);
1382 for (std::vector
<uint32_t>& vec
: ctx
.affinities
) {
1383 for (unsigned i
= 0; i
< vec
.size(); i
++) {
1384 for (unsigned j
= i
+ 1; j
< vec
.size(); j
++) {
1385 assert(is_assigned
[vec
[i
]] == is_assigned
[vec
[j
]]);
1386 if (!is_assigned
[vec
[i
]])
1388 assert(ctx
.is_reloaded
[vec
[i
]] == ctx
.is_reloaded
[vec
[j
]]);
1389 assert(ctx
.interferences
[vec
[i
]].first
.type() == ctx
.interferences
[vec
[j
]].first
.type());
1390 if (ctx
.interferences
[vec
[i
]].first
.type() == RegType::sgpr
)
1391 assert(sgpr_slot
[vec
[i
]] == sgpr_slot
[vec
[j
]]);
1393 assert(vgpr_slot
[vec
[i
]] == vgpr_slot
[vec
[j
]]);
1398 /* hope, we didn't mess up */
1399 std::vector
<Temp
> vgpr_spill_temps((spill_slot_interferences
.size() + 63) / 64);
1400 assert(vgpr_spill_temps
.size() <= spills_to_vgpr
);
1402 /* replace pseudo instructions with actual hardware instructions */
1403 unsigned last_top_level_block_idx
= 0;
1404 std::vector
<bool> reload_in_loop(vgpr_spill_temps
.size());
1405 for (Block
& block
: ctx
.program
->blocks
) {
1407 /* after loops, we insert a user if there was a reload inside the loop */
1408 if (block
.loop_nest_depth
== 0) {
1410 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1411 if (reload_in_loop
[i
])
1415 if (end_vgprs
> 0) {
1416 aco_ptr
<Instruction
> destr
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_end_linear_vgpr
, Format::PSEUDO
, end_vgprs
, 0)};
1418 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1419 if (reload_in_loop
[i
])
1420 destr
->operands
[k
++] = Operand(vgpr_spill_temps
[i
]);
1421 reload_in_loop
[i
] = false;
1423 /* find insertion point */
1424 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.begin();
1425 while ((*it
)->opcode
== aco_opcode::p_linear_phi
|| (*it
)->opcode
== aco_opcode::p_phi
)
1427 block
.instructions
.insert(it
, std::move(destr
));
1431 if (block
.kind
& block_kind_top_level
&& !block
.linear_preds
.empty()) {
1432 last_top_level_block_idx
= block
.index
;
1434 /* check if any spilled variables use a created linear vgpr, otherwise destroy them */
1435 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1436 if (vgpr_spill_temps
[i
] == Temp())
1439 bool can_destroy
= true;
1440 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[block
.linear_preds
[0]]) {
1442 if (sgpr_slot
.find(pair
.second
) != sgpr_slot
.end() &&
1443 sgpr_slot
[pair
.second
] / 64 == i
) {
1444 can_destroy
= false;
1449 vgpr_spill_temps
[i
] = Temp();
1453 std::vector
<aco_ptr
<Instruction
>>::iterator it
;
1454 std::vector
<aco_ptr
<Instruction
>> instructions
;
1455 instructions
.reserve(block
.instructions
.size());
1456 for (it
= block
.instructions
.begin(); it
!= block
.instructions
.end(); ++it
) {
1458 if ((*it
)->opcode
== aco_opcode::p_spill
) {
1459 uint32_t spill_id
= (*it
)->operands
[1].constantValue();
1461 if (!ctx
.is_reloaded
[spill_id
]) {
1462 /* never reloaded, so don't spill */
1463 } else if (vgpr_slot
.find(spill_id
) != vgpr_slot
.end()) {
1465 ctx
.program
->config
->spilled_vgprs
+= (*it
)->operands
[0].size();
1467 assert(false && "vgpr spilling not yet implemented.");
1468 } else if (sgpr_slot
.find(spill_id
) != sgpr_slot
.end()) {
1469 ctx
.program
->config
->spilled_sgprs
+= (*it
)->operands
[0].size();
1471 uint32_t spill_slot
= sgpr_slot
[spill_id
];
1473 /* check if the linear vgpr already exists */
1474 if (vgpr_spill_temps
[spill_slot
/ 64] == Temp()) {
1475 Temp linear_vgpr
= {ctx
.program
->allocateId(), v1
.as_linear()};
1476 vgpr_spill_temps
[spill_slot
/ 64] = linear_vgpr
;
1477 aco_ptr
<Pseudo_instruction
> create
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_start_linear_vgpr
, Format::PSEUDO
, 0, 1)};
1478 create
->definitions
[0] = Definition(linear_vgpr
);
1479 /* find the right place to insert this definition */
1480 if (last_top_level_block_idx
== block
.index
) {
1481 /* insert right before the current instruction */
1482 instructions
.emplace_back(std::move(create
));
1484 assert(last_top_level_block_idx
< block
.index
);
1485 /* insert before the branch at last top level block */
1486 std::vector
<aco_ptr
<Instruction
>>& instructions
= ctx
.program
->blocks
[last_top_level_block_idx
].instructions
;
1487 instructions
.insert(std::next(instructions
.begin(), instructions
.size() - 1), std::move(create
));
1491 /* spill sgpr: just add the vgpr temp to operands */
1492 Pseudo_instruction
* spill
= create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 3, 0);
1493 spill
->operands
[0] = Operand(vgpr_spill_temps
[spill_slot
/ 64]);
1494 spill
->operands
[1] = Operand(spill_slot
% 64);
1495 spill
->operands
[2] = (*it
)->operands
[0];
1496 instructions
.emplace_back(aco_ptr
<Instruction
>(spill
));
1498 unreachable("No spill slot assigned for spill id");
1501 } else if ((*it
)->opcode
== aco_opcode::p_reload
) {
1502 uint32_t spill_id
= (*it
)->operands
[0].constantValue();
1503 assert(ctx
.is_reloaded
[spill_id
]);
1505 if (vgpr_slot
.find(spill_id
) != vgpr_slot
.end()) {
1507 assert(false && "vgpr spilling not yet implemented.");
1509 } else if (sgpr_slot
.find(spill_id
) != sgpr_slot
.end()) {
1510 uint32_t spill_slot
= sgpr_slot
[spill_id
];
1511 reload_in_loop
[spill_slot
/ 64] = block
.loop_nest_depth
> 0;
1513 /* check if the linear vgpr already exists */
1514 if (vgpr_spill_temps
[spill_slot
/ 64] == Temp()) {
1515 Temp linear_vgpr
= {ctx
.program
->allocateId(), v1
.as_linear()};
1516 vgpr_spill_temps
[spill_slot
/ 64] = linear_vgpr
;
1517 aco_ptr
<Pseudo_instruction
> create
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_start_linear_vgpr
, Format::PSEUDO
, 0, 1)};
1518 create
->definitions
[0] = Definition(linear_vgpr
);
1519 /* find the right place to insert this definition */
1520 if (last_top_level_block_idx
== block
.index
) {
1521 /* insert right before the current instruction */
1522 instructions
.emplace_back(std::move(create
));
1524 assert(last_top_level_block_idx
< block
.index
);
1525 /* insert before the branch at last top level block */
1526 std::vector
<aco_ptr
<Instruction
>>& instructions
= ctx
.program
->blocks
[last_top_level_block_idx
].instructions
;
1527 instructions
.insert(std::next(instructions
.begin(), instructions
.size() - 1), std::move(create
));
1531 /* reload sgpr: just add the vgpr temp to operands */
1532 Pseudo_instruction
* reload
= create_instruction
<Pseudo_instruction
>(aco_opcode::p_reload
, Format::PSEUDO
, 2, 1);
1533 reload
->operands
[0] = Operand(vgpr_spill_temps
[spill_slot
/ 64]);
1534 reload
->operands
[1] = Operand(spill_slot
% 64);
1535 reload
->definitions
[0] = (*it
)->definitions
[0];
1536 instructions
.emplace_back(aco_ptr
<Instruction
>(reload
));
1538 unreachable("No spill slot assigned for spill id");
1540 } else if (!ctx
.remat_used
.count(it
->get()) || ctx
.remat_used
[it
->get()]) {
1541 instructions
.emplace_back(std::move(*it
));
1545 block
.instructions
= std::move(instructions
);
1548 /* SSA elimination inserts copies for logical phis right before p_logical_end
1549 * So if a linear vgpr is used between that p_logical_end and the branch,
1550 * we need to ensure logical phis don't choose a definition which aliases
1552 * TODO: Moving the spills and reloads to before p_logical_end might produce
1553 * slightly better code. */
1554 for (Block
& block
: ctx
.program
->blocks
) {
1555 /* loops exits are already handled */
1556 if (block
.logical_preds
.size() <= 1)
1559 bool has_logical_phis
= false;
1560 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
1561 if (instr
->opcode
== aco_opcode::p_phi
) {
1562 has_logical_phis
= true;
1564 } else if (instr
->opcode
!= aco_opcode::p_linear_phi
) {
1568 if (!has_logical_phis
)
1571 std::set
<Temp
> vgprs
;
1572 for (unsigned pred_idx
: block
.logical_preds
) {
1573 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
1574 for (int i
= pred
.instructions
.size() - 1; i
>= 0; i
--) {
1575 aco_ptr
<Instruction
>& pred_instr
= pred
.instructions
[i
];
1576 if (pred_instr
->opcode
== aco_opcode::p_logical_end
) {
1578 } else if (pred_instr
->opcode
== aco_opcode::p_spill
||
1579 pred_instr
->opcode
== aco_opcode::p_reload
) {
1580 vgprs
.insert(pred_instr
->operands
[0].getTemp());
1587 aco_ptr
<Instruction
> destr
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_end_linear_vgpr
, Format::PSEUDO
, vgprs
.size(), 0)};
1589 for (Temp tmp
: vgprs
) {
1590 destr
->operands
[k
++] = Operand(tmp
);
1592 /* find insertion point */
1593 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.begin();
1594 while ((*it
)->opcode
== aco_opcode::p_linear_phi
|| (*it
)->opcode
== aco_opcode::p_phi
)
1596 block
.instructions
.insert(it
, std::move(destr
));
1600 } /* end namespace */
1603 void spill(Program
* program
, live
& live_vars
, const struct radv_nir_compiler_options
*options
)
1605 program
->config
->spilled_vgprs
= 0;
1606 program
->config
->spilled_sgprs
= 0;
1608 /* no spilling when register pressure is low enough */
1609 if (program
->num_waves
> 0)
1612 /* lower to CSSA before spilling to ensure correctness w.r.t. phis */
1613 lower_to_cssa(program
, live_vars
, options
);
1615 /* calculate target register demand */
1616 RegisterDemand register_target
= program
->max_reg_demand
;
1617 if (register_target
.sgpr
> program
->sgpr_limit
)
1618 register_target
.vgpr
+= (register_target
.sgpr
- program
->sgpr_limit
+ 63 + 32) / 64;
1619 register_target
.sgpr
= program
->sgpr_limit
;
1621 if (register_target
.vgpr
> program
->vgpr_limit
)
1622 register_target
.sgpr
= program
->sgpr_limit
- 5;
1623 register_target
.vgpr
= program
->vgpr_limit
- (register_target
.vgpr
- program
->max_reg_demand
.vgpr
);
1625 int spills_to_vgpr
= (program
->max_reg_demand
.sgpr
- register_target
.sgpr
+ 63 + 32) / 64;
1627 /* initialize ctx */
1628 spill_ctx
ctx(register_target
, program
, live_vars
.register_demand
);
1629 compute_global_next_uses(ctx
, live_vars
.live_out
);
1630 get_rematerialize_info(ctx
);
1632 /* create spills and reloads */
1633 for (unsigned i
= 0; i
< program
->blocks
.size(); i
++)
1634 spill_block(ctx
, i
);
1636 /* assign spill slots and DCE rematerialized code */
1637 assign_spill_slots(ctx
, spills_to_vgpr
);
1639 /* update live variable information */
1640 live_vars
= live_var_analysis(program
, options
);
1642 assert(program
->num_waves
>= 0);