aco: Lower to CSSA
[mesa.git] / src / amd / compiler / aco_spill.cpp
1 /*
2 * Copyright © 2018 Valve Corporation
3 * Copyright © 2018 Google
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 */
25
26 #include "aco_ir.h"
27 #include <map>
28 #include <stack>
29 #include "vulkan/radv_shader.h"
30
31
32 /*
33 * Implements the spilling algorithm on SSA-form from
34 * "Register Spilling and Live-Range Splitting for SSA-Form Programs"
35 * by Matthias Braun and Sebastian Hack.
36 */
37
38 namespace aco {
39
40 namespace {
41
42 struct remat_info {
43 Instruction *instr;
44 };
45
46 struct spill_ctx {
47 RegisterDemand target_pressure;
48 Program* program;
49 std::vector<std::vector<RegisterDemand>> register_demand;
50 std::vector<std::map<Temp, Temp>> renames;
51 std::vector<std::map<Temp, uint32_t>> spills_entry;
52 std::vector<std::map<Temp, uint32_t>> spills_exit;
53 std::vector<bool> processed;
54 std::stack<Block*> loop_header;
55 std::vector<std::map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_start;
56 std::vector<std::map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_end;
57 std::vector<std::pair<RegClass, std::set<uint32_t>>> interferences;
58 std::vector<std::pair<uint32_t, uint32_t>> affinities;
59 std::vector<bool> is_reloaded;
60 std::map<Temp, remat_info> remat;
61 std::map<Instruction *, bool> remat_used;
62
63 spill_ctx(const RegisterDemand target_pressure, Program* program,
64 std::vector<std::vector<RegisterDemand>> register_demand)
65 : target_pressure(target_pressure), program(program),
66 register_demand(register_demand), renames(program->blocks.size()),
67 spills_entry(program->blocks.size()), spills_exit(program->blocks.size()),
68 processed(program->blocks.size(), false) {}
69
70 uint32_t allocate_spill_id(RegClass rc)
71 {
72 interferences.emplace_back(rc, std::set<uint32_t>());
73 is_reloaded.push_back(false);
74 return next_spill_id++;
75 }
76
77 uint32_t next_spill_id = 0;
78 };
79
80 int32_t get_dominator(int idx_a, int idx_b, Program* program, bool is_linear)
81 {
82
83 if (idx_a == -1)
84 return idx_b;
85 if (idx_b == -1)
86 return idx_a;
87 if (is_linear) {
88 while (idx_a != idx_b) {
89 if (idx_a > idx_b)
90 idx_a = program->blocks[idx_a].linear_idom;
91 else
92 idx_b = program->blocks[idx_b].linear_idom;
93 }
94 } else {
95 while (idx_a != idx_b) {
96 if (idx_a > idx_b)
97 idx_a = program->blocks[idx_a].logical_idom;
98 else
99 idx_b = program->blocks[idx_b].logical_idom;
100 }
101 }
102 assert(idx_a != -1);
103 return idx_a;
104 }
105
106 void next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set<uint32_t>& worklist)
107 {
108 Block* block = &ctx.program->blocks[block_idx];
109 std::map<Temp, std::pair<uint32_t, uint32_t>> next_uses = ctx.next_use_distances_end[block_idx];
110
111 /* to compute the next use distance at the beginning of the block, we have to add the block's size */
112 for (std::map<Temp, std::pair<uint32_t, uint32_t>>::iterator it = next_uses.begin(); it != next_uses.end();) {
113 it->second.second = it->second.second + block->instructions.size();
114
115 /* remove the live out exec mask as we really don't want to spill it */
116 if (it->first == block->live_out_exec)
117 it = next_uses.erase(it);
118 else
119 ++it;
120 }
121
122 int idx = block->instructions.size() - 1;
123 while (idx >= 0) {
124 aco_ptr<Instruction>& instr = block->instructions[idx];
125
126 if (instr->opcode == aco_opcode::p_linear_phi ||
127 instr->opcode == aco_opcode::p_phi)
128 break;
129
130 for (const Definition& def : instr->definitions) {
131 if (def.isTemp())
132 next_uses.erase(def.getTemp());
133 }
134
135 for (const Operand& op : instr->operands) {
136 /* omit exec mask */
137 if (op.isFixed() && op.physReg() == exec)
138 continue;
139 if (op.isTemp())
140 next_uses[op.getTemp()] = {block_idx, idx};
141 }
142 idx--;
143 }
144
145 assert(block_idx != 0 || next_uses.empty());
146 ctx.next_use_distances_start[block_idx] = next_uses;
147 while (idx >= 0) {
148 aco_ptr<Instruction>& instr = block->instructions[idx];
149 assert(instr->opcode == aco_opcode::p_linear_phi || instr->opcode == aco_opcode::p_phi);
150
151 for (unsigned i = 0; i < instr->operands.size(); i++) {
152 unsigned pred_idx = instr->opcode == aco_opcode::p_phi ?
153 block->logical_preds[i] :
154 block->linear_preds[i];
155 if (instr->operands[i].isTemp()) {
156 if (ctx.next_use_distances_end[pred_idx].find(instr->operands[i].getTemp()) == ctx.next_use_distances_end[pred_idx].end() ||
157 ctx.next_use_distances_end[pred_idx][instr->operands[i].getTemp()] != std::pair<uint32_t, uint32_t>{block_idx, 0})
158 worklist.insert(pred_idx);
159 ctx.next_use_distances_end[pred_idx][instr->operands[i].getTemp()] = {block_idx, 0};
160 }
161 }
162 next_uses.erase(instr->definitions[0].getTemp());
163 idx--;
164 }
165
166 /* all remaining live vars must be live-out at the predecessors */
167 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : next_uses) {
168 Temp temp = pair.first;
169 uint32_t distance = pair.second.second;
170 uint32_t dom = pair.second.first;
171 std::vector<unsigned>& preds = temp.is_linear() ? block->linear_preds : block->logical_preds;
172 for (unsigned pred_idx : preds) {
173 if (ctx.program->blocks[pred_idx].loop_nest_depth > block->loop_nest_depth)
174 distance += 0xFFFF;
175 if (ctx.next_use_distances_end[pred_idx].find(temp) != ctx.next_use_distances_end[pred_idx].end()) {
176 dom = get_dominator(dom, ctx.next_use_distances_end[pred_idx][temp].first, ctx.program, temp.is_linear());
177 distance = std::min(ctx.next_use_distances_end[pred_idx][temp].second, distance);
178 }
179 if (ctx.next_use_distances_end[pred_idx][temp] != std::pair<uint32_t, uint32_t>{dom, distance})
180 worklist.insert(pred_idx);
181 ctx.next_use_distances_end[pred_idx][temp] = {dom, distance};
182 }
183 }
184
185 }
186
187 void compute_global_next_uses(spill_ctx& ctx, std::vector<std::set<Temp>>& live_out)
188 {
189 ctx.next_use_distances_start.resize(ctx.program->blocks.size());
190 ctx.next_use_distances_end.resize(ctx.program->blocks.size());
191 std::set<uint32_t> worklist;
192 for (Block& block : ctx.program->blocks)
193 worklist.insert(block.index);
194
195 while (!worklist.empty()) {
196 std::set<unsigned>::reverse_iterator b_it = worklist.rbegin();
197 unsigned block_idx = *b_it;
198 worklist.erase(block_idx);
199 next_uses_per_block(ctx, block_idx, worklist);
200 }
201 }
202
203 bool should_rematerialize(aco_ptr<Instruction>& instr)
204 {
205 /* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */
206 if (instr->format != Format::VOP1 && instr->format != Format::SOP1 && instr->format != Format::PSEUDO)
207 return false;
208 /* TODO: pseudo-instruction rematerialization is only supported for p_create_vector */
209 if (instr->format == Format::PSEUDO && instr->opcode != aco_opcode::p_create_vector)
210 return false;
211
212 for (const Operand& op : instr->operands) {
213 /* TODO: rematerialization using temporaries isn't yet supported */
214 if (op.isTemp())
215 return false;
216 }
217
218 /* TODO: rematerialization with multiple definitions isn't yet supported */
219 if (instr->definitions.size() > 1)
220 return false;
221
222 return true;
223 }
224
225 aco_ptr<Instruction> do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
226 {
227 std::map<Temp, remat_info>::iterator remat = ctx.remat.find(tmp);
228 if (remat != ctx.remat.end()) {
229 Instruction *instr = remat->second.instr;
230 assert((instr->format == Format::VOP1 || instr->format == Format::SOP1 || instr->format == Format::PSEUDO) && "unsupported");
231 assert((instr->format != Format::PSEUDO || instr->opcode == aco_opcode::p_create_vector) && "unsupported");
232 assert(instr->definitions.size() == 1 && "unsupported");
233
234 aco_ptr<Instruction> res;
235 if (instr->format == Format::VOP1) {
236 res.reset(create_instruction<VOP1_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
237 } else if (instr->format == Format::SOP1) {
238 res.reset(create_instruction<SOP1_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
239 } else if (instr->format == Format::PSEUDO) {
240 res.reset(create_instruction<Instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
241 }
242 for (unsigned i = 0; i < instr->operands.size(); i++) {
243 res->operands[i] = instr->operands[i];
244 if (instr->operands[i].isTemp()) {
245 assert(false && "unsupported");
246 if (ctx.remat.count(instr->operands[i].getTemp()))
247 ctx.remat_used[ctx.remat[instr->operands[i].getTemp()].instr] = true;
248 }
249 }
250 res->definitions[0] = Definition(new_name);
251 return res;
252 } else {
253 aco_ptr<Pseudo_instruction> reload{create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
254 reload->operands[0] = Operand(spill_id);
255 reload->definitions[0] = Definition(new_name);
256 ctx.is_reloaded[spill_id] = true;
257 return reload;
258 }
259 }
260
261 void get_rematerialize_info(spill_ctx& ctx)
262 {
263 for (Block& block : ctx.program->blocks) {
264 bool logical = false;
265 for (aco_ptr<Instruction>& instr : block.instructions) {
266 if (instr->opcode == aco_opcode::p_logical_start)
267 logical = true;
268 else if (instr->opcode == aco_opcode::p_logical_end)
269 logical = false;
270 if (logical && should_rematerialize(instr)) {
271 for (const Definition& def : instr->definitions) {
272 if (def.isTemp()) {
273 ctx.remat[def.getTemp()] = (remat_info){instr.get()};
274 ctx.remat_used[instr.get()] = false;
275 }
276 }
277 }
278 }
279 }
280 }
281
282 std::vector<std::map<Temp, uint32_t>> local_next_uses(spill_ctx& ctx, Block* block)
283 {
284 std::vector<std::map<Temp, uint32_t>> local_next_uses(block->instructions.size());
285
286 std::map<Temp, uint32_t> next_uses;
287 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_end[block->index]) {
288 /* omit live out exec mask */
289 if (pair.first == block->live_out_exec)
290 continue;
291
292 next_uses[pair.first] = pair.second.second + block->instructions.size();
293 }
294
295 for (int idx = block->instructions.size() - 1; idx >= 0; idx--) {
296 aco_ptr<Instruction>& instr = block->instructions[idx];
297 if (!instr)
298 break;
299 if (instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi)
300 break;
301
302 for (const Operand& op : instr->operands) {
303 if (op.isFixed() && op.physReg() == exec)
304 continue;
305 if (op.isTemp())
306 next_uses[op.getTemp()] = idx;
307 }
308 for (const Definition& def : instr->definitions) {
309 if (def.isTemp())
310 next_uses.erase(def.getTemp());
311 }
312 local_next_uses[idx] = next_uses;
313 }
314 return local_next_uses;
315 }
316
317
318 RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
319 {
320 RegisterDemand spilled_registers;
321
322 /* first block, nothing was spilled before */
323 if (block_idx == 0)
324 return {0, 0};
325
326 /* loop header block */
327 if (block->loop_nest_depth > ctx.program->blocks[block_idx - 1].loop_nest_depth) {
328 assert(block->linear_preds[0] == block_idx - 1);
329 assert(block->logical_preds[0] == block_idx - 1);
330
331 /* create new loop_info */
332 ctx.loop_header.emplace(block);
333
334 /* check how many live-through variables should be spilled */
335 RegisterDemand new_demand;
336 unsigned i = block_idx;
337 while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth) {
338 assert(ctx.program->blocks.size() > i);
339 new_demand.update(ctx.program->blocks[i].register_demand);
340 i++;
341 }
342 unsigned loop_end = i;
343
344 /* select live-through vgpr variables */
345 while (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr) {
346 unsigned distance = 0;
347 Temp to_spill;
348 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_end[block_idx - 1]) {
349 if (pair.first.type() == RegType::vgpr &&
350 pair.second.first >= loop_end &&
351 pair.second.second > distance &&
352 ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) {
353 to_spill = pair.first;
354 distance = pair.second.second;
355 }
356 }
357 if (distance == 0)
358 break;
359
360 uint32_t spill_id;
361 if (ctx.spills_exit[block_idx - 1].find(to_spill) == ctx.spills_exit[block_idx - 1].end()) {
362 spill_id = ctx.allocate_spill_id(to_spill.regClass());
363 } else {
364 spill_id = ctx.spills_exit[block_idx - 1][to_spill];
365 }
366
367 ctx.spills_entry[block_idx][to_spill] = spill_id;
368 spilled_registers.vgpr += to_spill.size();
369 }
370
371 /* select live-through sgpr variables */
372 while (new_demand.sgpr - spilled_registers.sgpr > ctx.target_pressure.sgpr) {
373 unsigned distance = 0;
374 Temp to_spill;
375 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_end[block_idx - 1]) {
376 if (pair.first.type() == RegType::sgpr &&
377 pair.second.first >= loop_end &&
378 pair.second.second > distance &&
379 ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) {
380 to_spill = pair.first;
381 distance = pair.second.second;
382 }
383 }
384 if (distance == 0)
385 break;
386
387 uint32_t spill_id;
388 if (ctx.spills_exit[block_idx - 1].find(to_spill) == ctx.spills_exit[block_idx - 1].end()) {
389 spill_id = ctx.allocate_spill_id(to_spill.regClass());
390 } else {
391 spill_id = ctx.spills_exit[block_idx - 1][to_spill];
392 }
393
394 ctx.spills_entry[block_idx][to_spill] = spill_id;
395 spilled_registers.sgpr += to_spill.size();
396 }
397
398
399
400 /* shortcut */
401 if (!RegisterDemand(new_demand - spilled_registers).exceeds(ctx.target_pressure))
402 return spilled_registers;
403
404 /* if reg pressure is too high at beginning of loop, add variables with furthest use */
405 unsigned idx = 0;
406 while (block->instructions[idx]->opcode == aco_opcode::p_phi || block->instructions[idx]->opcode == aco_opcode::p_linear_phi)
407 idx++;
408
409 assert(idx != 0 && "loop without phis: TODO");
410 idx--;
411 RegisterDemand reg_pressure = ctx.register_demand[block_idx][idx] - spilled_registers;
412 while (reg_pressure.sgpr > ctx.target_pressure.sgpr) {
413 unsigned distance = 0;
414 Temp to_spill;
415 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_start[block_idx]) {
416 if (pair.first.type() == RegType::sgpr &&
417 pair.second.second > distance &&
418 ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) {
419 to_spill = pair.first;
420 distance = pair.second.second;
421 }
422 }
423 assert(distance != 0);
424
425 ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass());
426 spilled_registers.sgpr += to_spill.size();
427 reg_pressure.sgpr -= to_spill.size();
428 }
429 while (reg_pressure.vgpr > ctx.target_pressure.vgpr) {
430 unsigned distance = 0;
431 Temp to_spill;
432 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_start[block_idx]) {
433 if (pair.first.type() == RegType::vgpr &&
434 pair.second.second > distance &&
435 ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) {
436 to_spill = pair.first;
437 distance = pair.second.second;
438 }
439 }
440 assert(distance != 0);
441 ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass());
442 spilled_registers.vgpr += to_spill.size();
443 reg_pressure.vgpr -= to_spill.size();
444 }
445
446 return spilled_registers;
447 }
448
449 /* branch block */
450 if (block->linear_preds.size() == 1) {
451 /* keep variables spilled if they are alive and not used in the current block */
452 unsigned pred_idx = block->linear_preds[0];
453 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
454 if (pair.first.type() == RegType::sgpr &&
455 ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
456 ctx.next_use_distances_start[block_idx][pair.first].second > block_idx) {
457 ctx.spills_entry[block_idx].insert(pair);
458 spilled_registers.sgpr += pair.first.size();
459 }
460 }
461 if (block->logical_preds.size() == 1) {
462 pred_idx = block->logical_preds[0];
463 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
464 if (pair.first.type() == RegType::vgpr &&
465 ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
466 ctx.next_use_distances_end[pred_idx][pair.first].second > block_idx) {
467 ctx.spills_entry[block_idx].insert(pair);
468 spilled_registers.vgpr += pair.first.size();
469 }
470 }
471 }
472
473 /* if register demand is still too high, we just keep all spilled live vars and process the block */
474 if (block->register_demand.sgpr - spilled_registers.sgpr > ctx.target_pressure.sgpr) {
475 pred_idx = block->linear_preds[0];
476 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
477 if (pair.first.type() == RegType::sgpr &&
478 ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
479 ctx.spills_entry[block_idx].insert(pair).second) {
480 spilled_registers.sgpr += pair.first.size();
481 }
482 }
483 }
484 if (block->register_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr && block->logical_preds.size() == 1) {
485 pred_idx = block->logical_preds[0];
486 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
487 if (pair.first.type() == RegType::vgpr &&
488 ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
489 ctx.spills_entry[block_idx].insert(pair).second) {
490 spilled_registers.vgpr += pair.first.size();
491 }
492 }
493 }
494
495 return spilled_registers;
496 }
497
498 /* else: merge block */
499 std::set<Temp> partial_spills;
500
501 /* keep variables spilled on all incoming paths */
502 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_start[block_idx]) {
503 std::vector<unsigned>& preds = pair.first.type() == RegType::vgpr ? block->logical_preds : block->linear_preds;
504 /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload it.
505 * Otherwise, if any predecessor reloads it, ensure it's reloaded on all other predecessors.
506 * The idea is that it's better in practice to rematerialize redundantly than to create lots of phis. */
507 /* TODO: test this idea with more than Dawn of War III shaders (the current pipeline-db doesn't seem to exercise this path much) */
508 bool remat = ctx.remat.count(pair.first);
509 bool spill = !remat;
510 uint32_t spill_id = 0;
511 for (unsigned pred_idx : preds) {
512 /* variable is not even live at the predecessor: probably from a phi */
513 if (ctx.next_use_distances_end[pred_idx].find(pair.first) == ctx.next_use_distances_end[pred_idx].end()) {
514 spill = false;
515 break;
516 }
517 if (ctx.spills_exit[pred_idx].find(pair.first) == ctx.spills_exit[pred_idx].end()) {
518 if (!remat)
519 spill = false;
520 } else {
521 partial_spills.insert(pair.first);
522 /* it might be that on one incoming path, the variable has a different spill_id, but add_couple_code() will take care of that. */
523 spill_id = ctx.spills_exit[pred_idx][pair.first];
524 if (remat)
525 spill = true;
526 }
527 }
528 if (spill) {
529 ctx.spills_entry[block_idx][pair.first] = spill_id;
530 partial_spills.erase(pair.first);
531 spilled_registers += pair.first;
532 }
533 }
534
535 /* same for phis */
536 unsigned idx = 0;
537 while (block->instructions[idx]->opcode == aco_opcode::p_linear_phi ||
538 block->instructions[idx]->opcode == aco_opcode::p_phi) {
539 aco_ptr<Instruction>& phi = block->instructions[idx];
540 std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
541 bool spill = true;
542
543 for (unsigned i = 0; i < phi->operands.size(); i++) {
544 if (phi->operands[i].isUndefined())
545 continue;
546 assert(phi->operands[i].isTemp());
547 if (ctx.spills_exit[preds[i]].find(phi->operands[i].getTemp()) == ctx.spills_exit[preds[i]].end())
548 spill = false;
549 else
550 partial_spills.insert(phi->definitions[0].getTemp());
551 }
552 if (spill) {
553 ctx.spills_entry[block_idx][phi->definitions[0].getTemp()] = ctx.allocate_spill_id(phi->definitions[0].regClass());
554 partial_spills.erase(phi->definitions[0].getTemp());
555 spilled_registers += phi->definitions[0].getTemp();
556 }
557
558 idx++;
559 }
560
561 /* if reg pressure at first instruction is still too high, add partially spilled variables */
562 RegisterDemand reg_pressure;
563 if (idx == 0) {
564 for (const Definition& def : block->instructions[idx]->definitions) {
565 if (def.isTemp()) {
566 reg_pressure -= def.getTemp();
567 }
568 }
569 for (const Operand& op : block->instructions[idx]->operands) {
570 if (op.isTemp() && op.isFirstKill()) {
571 reg_pressure += op.getTemp();
572 }
573 }
574 } else {
575 idx--;
576 }
577 reg_pressure += ctx.register_demand[block_idx][idx] - spilled_registers;
578
579 while (reg_pressure.sgpr > ctx.target_pressure.sgpr) {
580 assert(!partial_spills.empty());
581
582 std::set<Temp>::iterator it = partial_spills.begin();
583 Temp to_spill = *it;
584 unsigned distance = ctx.next_use_distances_start[block_idx][*it].second;
585 while (it != partial_spills.end()) {
586 assert(ctx.spills_entry[block_idx].find(*it) == ctx.spills_entry[block_idx].end());
587
588 if (it->type() == RegType::sgpr && ctx.next_use_distances_start[block_idx][*it].second > distance) {
589 distance = ctx.next_use_distances_start[block_idx][*it].second;
590 to_spill = *it;
591 }
592 ++it;
593 }
594 assert(distance != 0);
595
596 ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass());
597 partial_spills.erase(to_spill);
598 spilled_registers.sgpr += to_spill.size();
599 reg_pressure.sgpr -= to_spill.size();
600 }
601
602 while (reg_pressure.vgpr > ctx.target_pressure.vgpr) {
603 assert(!partial_spills.empty());
604
605 std::set<Temp>::iterator it = partial_spills.begin();
606 Temp to_spill = *it;
607 unsigned distance = ctx.next_use_distances_start[block_idx][*it].second;
608 while (it != partial_spills.end()) {
609 assert(ctx.spills_entry[block_idx].find(*it) == ctx.spills_entry[block_idx].end());
610
611 if (it->type() == RegType::vgpr && ctx.next_use_distances_start[block_idx][*it].second > distance) {
612 distance = ctx.next_use_distances_start[block_idx][*it].second;
613 to_spill = *it;
614 }
615 ++it;
616 }
617 assert(distance != 0);
618
619 ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass());
620 partial_spills.erase(to_spill);
621 spilled_registers.vgpr += to_spill.size();
622 reg_pressure.vgpr -= to_spill.size();
623 }
624
625 return spilled_registers;
626 }
627
628
629 void add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
630 {
631 /* no coupling code necessary */
632 if (block->linear_preds.size() == 0)
633 return;
634
635 std::vector<aco_ptr<Instruction>> instructions;
636 /* branch block: TODO take other branch into consideration */
637 if (block->linear_preds.size() == 1) {
638 assert(ctx.processed[block->linear_preds[0]]);
639
640 if (block->logical_preds.size() == 1) {
641 unsigned pred_idx = block->logical_preds[0];
642 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> live : ctx.next_use_distances_start[block_idx]) {
643 if (live.first.type() == RegType::sgpr)
644 continue;
645 /* still spilled */
646 if (ctx.spills_entry[block_idx].find(live.first) != ctx.spills_entry[block_idx].end())
647 continue;
648
649 /* in register at end of predecessor */
650 if (ctx.spills_exit[pred_idx].find(live.first) == ctx.spills_exit[pred_idx].end()) {
651 std::map<Temp, Temp>::iterator it = ctx.renames[pred_idx].find(live.first);
652 if (it != ctx.renames[pred_idx].end())
653 ctx.renames[block_idx].insert(*it);
654 continue;
655 }
656
657 /* variable is spilled at predecessor and live at current block: create reload instruction */
658 Temp new_name = {ctx.program->allocateId(), live.first.regClass()};
659 aco_ptr<Instruction> reload = do_reload(ctx, live.first, new_name, ctx.spills_exit[pred_idx][live.first]);
660 instructions.emplace_back(std::move(reload));
661 ctx.renames[block_idx][live.first] = new_name;
662 }
663 }
664
665 unsigned pred_idx = block->linear_preds[0];
666 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> live : ctx.next_use_distances_start[block_idx]) {
667 if (live.first.type() == RegType::vgpr)
668 continue;
669 /* still spilled */
670 if (ctx.spills_entry[block_idx].find(live.first) != ctx.spills_entry[block_idx].end())
671 continue;
672
673 /* in register at end of predecessor */
674 if (ctx.spills_exit[pred_idx].find(live.first) == ctx.spills_exit[pred_idx].end()) {
675 std::map<Temp, Temp>::iterator it = ctx.renames[pred_idx].find(live.first);
676 if (it != ctx.renames[pred_idx].end())
677 ctx.renames[block_idx].insert(*it);
678 continue;
679 }
680
681 /* variable is spilled at predecessor and live at current block: create reload instruction */
682 Temp new_name = {ctx.program->allocateId(), live.first.regClass()};
683 aco_ptr<Instruction> reload = do_reload(ctx, live.first, new_name, ctx.spills_exit[pred_idx][live.first]);
684 instructions.emplace_back(std::move(reload));
685 ctx.renames[block_idx][live.first] = new_name;
686 }
687
688 /* combine new reload instructions with original block */
689 if (!instructions.empty()) {
690 unsigned insert_idx = 0;
691 while (block->instructions[insert_idx]->opcode == aco_opcode::p_phi ||
692 block->instructions[insert_idx]->opcode == aco_opcode::p_linear_phi) {
693 insert_idx++;
694 }
695 ctx.register_demand[block->index].insert(std::next(ctx.register_demand[block->index].begin(), insert_idx),
696 instructions.size(), RegisterDemand());
697 block->instructions.insert(std::next(block->instructions.begin(), insert_idx),
698 std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(instructions.begin()),
699 std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(instructions.end()));
700 }
701 return;
702 }
703
704 /* loop header and merge blocks: check if all (linear) predecessors have been processed */
705 for (ASSERTED unsigned pred : block->linear_preds)
706 assert(ctx.processed[pred]);
707
708 /* iterate the phi nodes for which operands to spill at the predecessor */
709 for (aco_ptr<Instruction>& phi : block->instructions) {
710 if (phi->opcode != aco_opcode::p_phi &&
711 phi->opcode != aco_opcode::p_linear_phi)
712 break;
713
714 /* if the phi is not spilled, add to instructions */
715 if (ctx.spills_entry[block_idx].find(phi->definitions[0].getTemp()) == ctx.spills_entry[block_idx].end()) {
716 instructions.emplace_back(std::move(phi));
717 continue;
718 }
719
720 std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
721 uint32_t def_spill_id = ctx.spills_entry[block_idx][phi->definitions[0].getTemp()];
722
723 for (unsigned i = 0; i < phi->operands.size(); i++) {
724 if (phi->operands[i].isUndefined())
725 continue;
726
727 unsigned pred_idx = preds[i];
728 assert(phi->operands[i].isTemp() && phi->operands[i].isKill());
729 Temp var = phi->operands[i].getTemp();
730
731 /* build interferences between the phi def and all spilled variables at the predecessor blocks */
732 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
733 if (var == pair.first)
734 continue;
735 ctx.interferences[def_spill_id].second.emplace(pair.second);
736 ctx.interferences[pair.second].second.emplace(def_spill_id);
737 }
738
739 /* check if variable is already spilled at predecessor */
740 std::map<Temp, uint32_t>::iterator spilled = ctx.spills_exit[pred_idx].find(var);
741 if (spilled != ctx.spills_exit[pred_idx].end()) {
742 if (spilled->second != def_spill_id)
743 ctx.affinities.emplace_back(std::pair<uint32_t, uint32_t>{def_spill_id, spilled->second});
744 continue;
745 }
746
747 /* rename if necessary */
748 std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
749 if (rename_it != ctx.renames[pred_idx].end()) {
750 var = rename_it->second;
751 ctx.renames[pred_idx].erase(rename_it);
752 }
753
754 uint32_t spill_id = ctx.allocate_spill_id(phi->definitions[0].regClass());
755 ctx.affinities.emplace_back(std::pair<uint32_t, uint32_t>{def_spill_id, spill_id});
756 aco_ptr<Pseudo_instruction> spill{create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
757 spill->operands[0] = Operand(var);
758 spill->operands[1] = Operand(spill_id);
759 Block& pred = ctx.program->blocks[pred_idx];
760 unsigned idx = pred.instructions.size();
761 do {
762 assert(idx != 0);
763 idx--;
764 } while (phi->opcode == aco_opcode::p_phi && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
765 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
766 pred.instructions.insert(it, std::move(spill));
767 ctx.spills_exit[pred_idx][phi->operands[i].getTemp()] = spill_id;
768 }
769
770 /* remove phi from instructions */
771 phi.reset();
772 }
773
774 /* iterate all (other) spilled variables for which to spill at the predecessor */
775 // TODO: would be better to have them sorted: first vgprs and first with longest distance
776 for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block_idx]) {
777 std::vector<unsigned> preds = pair.first.type() == RegType::vgpr ? block->logical_preds : block->linear_preds;
778
779 for (unsigned pred_idx : preds) {
780 /* add interferences between spilled variable and predecessors exit spills */
781 for (std::pair<Temp, uint32_t> exit_spill : ctx.spills_exit[pred_idx]) {
782 if (exit_spill.first == pair.first)
783 continue;
784 ctx.interferences[exit_spill.second].second.emplace(pair.second);
785 ctx.interferences[pair.second].second.emplace(exit_spill.second);
786 }
787
788 /* variable is already spilled at predecessor */
789 std::map<Temp, uint32_t>::iterator spilled = ctx.spills_exit[pred_idx].find(pair.first);
790 if (spilled != ctx.spills_exit[pred_idx].end()) {
791 if (spilled->second != pair.second)
792 ctx.affinities.emplace_back(std::pair<uint32_t, uint32_t>{pair.second, spilled->second});
793 continue;
794 }
795
796 /* variable is dead at predecessor, it must be from a phi: this works because of CSSA form */
797 if (ctx.next_use_distances_end[pred_idx].find(pair.first) == ctx.next_use_distances_end[pred_idx].end())
798 continue;
799
800 /* variable is in register at predecessor and has to be spilled */
801 /* rename if necessary */
802 Temp var = pair.first;
803 std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
804 if (rename_it != ctx.renames[pred_idx].end()) {
805 var = rename_it->second;
806 ctx.renames[pred_idx].erase(rename_it);
807 }
808
809 aco_ptr<Pseudo_instruction> spill{create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
810 spill->operands[0] = Operand(var);
811 spill->operands[1] = Operand(pair.second);
812 Block& pred = ctx.program->blocks[pred_idx];
813 unsigned idx = pred.instructions.size();
814 do {
815 assert(idx != 0);
816 idx--;
817 } while (pair.first.type() == RegType::vgpr && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
818 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
819 pred.instructions.insert(it, std::move(spill));
820 ctx.spills_exit[pred.index][pair.first] = pair.second;
821 }
822 }
823
824 /* iterate phis for which operands to reload */
825 for (aco_ptr<Instruction>& phi : instructions) {
826 assert(phi->opcode == aco_opcode::p_phi || phi->opcode == aco_opcode::p_linear_phi);
827 assert(ctx.spills_entry[block_idx].find(phi->definitions[0].getTemp()) == ctx.spills_entry[block_idx].end());
828
829 std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
830 for (unsigned i = 0; i < phi->operands.size(); i++) {
831 if (!phi->operands[i].isTemp())
832 continue;
833 unsigned pred_idx = preds[i];
834
835 /* rename operand */
836 if (ctx.spills_exit[pred_idx].find(phi->operands[i].getTemp()) == ctx.spills_exit[pred_idx].end()) {
837 std::map<Temp, Temp>::iterator it = ctx.renames[pred_idx].find(phi->operands[i].getTemp());
838 if (it != ctx.renames[pred_idx].end())
839 phi->operands[i].setTemp(it->second);
840 continue;
841 }
842
843 Temp tmp = phi->operands[i].getTemp();
844
845 /* reload phi operand at end of predecessor block */
846 Temp new_name = {ctx.program->allocateId(), tmp.regClass()};
847 Block& pred = ctx.program->blocks[pred_idx];
848 unsigned idx = pred.instructions.size();
849 do {
850 assert(idx != 0);
851 idx--;
852 } while (phi->opcode == aco_opcode::p_phi && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
853 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
854
855 aco_ptr<Instruction> reload = do_reload(ctx, tmp, new_name, ctx.spills_exit[pred_idx][tmp]);
856 pred.instructions.insert(it, std::move(reload));
857
858 ctx.spills_exit[pred_idx].erase(tmp);
859 ctx.renames[pred_idx][tmp] = new_name;
860 phi->operands[i].setTemp(new_name);
861 }
862 }
863
864 /* iterate live variables for which to reload */
865 // TODO: reload at current block if variable is spilled on all predecessors
866 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_start[block_idx]) {
867 /* skip spilled variables */
868 if (ctx.spills_entry[block_idx].find(pair.first) != ctx.spills_entry[block_idx].end())
869 continue;
870 std::vector<unsigned> preds = pair.first.type() == RegType::vgpr ? block->logical_preds : block->linear_preds;
871
872 /* variable is dead at predecessor, it must be from a phi */
873 bool is_dead = false;
874 for (unsigned pred_idx : preds) {
875 if (ctx.next_use_distances_end[pred_idx].find(pair.first) == ctx.next_use_distances_end[pred_idx].end())
876 is_dead = true;
877 }
878 if (is_dead)
879 continue;
880 for (unsigned pred_idx : preds) {
881 /* the variable is not spilled at the predecessor */
882 if (ctx.spills_exit[pred_idx].find(pair.first) == ctx.spills_exit[pred_idx].end())
883 continue;
884
885 /* variable is spilled at predecessor and has to be reloaded */
886 Temp new_name = {ctx.program->allocateId(), pair.first.regClass()};
887 Block& pred = ctx.program->blocks[pred_idx];
888 unsigned idx = pred.instructions.size();
889 do {
890 assert(idx != 0);
891 idx--;
892 } while (pair.first.type() == RegType::vgpr && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
893 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
894
895 aco_ptr<Instruction> reload = do_reload(ctx, pair.first, new_name, ctx.spills_exit[pred.index][pair.first]);
896 pred.instructions.insert(it, std::move(reload));
897
898 ctx.spills_exit[pred.index].erase(pair.first);
899 ctx.renames[pred.index][pair.first] = new_name;
900 }
901
902 /* check if we have to create a new phi for this variable */
903 Temp rename = Temp();
904 bool is_same = true;
905 for (unsigned pred_idx : preds) {
906 if (ctx.renames[pred_idx].find(pair.first) == ctx.renames[pred_idx].end()) {
907 if (rename == Temp())
908 rename = pair.first;
909 else
910 is_same = rename == pair.first;
911 } else {
912 if (rename == Temp())
913 rename = ctx.renames[pred_idx][pair.first];
914 else
915 is_same = rename == ctx.renames[pred_idx][pair.first];
916 }
917
918 if (!is_same)
919 break;
920 }
921
922 if (!is_same) {
923 /* the variable was renamed differently in the predecessors: we have to create a phi */
924 aco_opcode opcode = pair.first.type() == RegType::vgpr ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
925 aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)};
926 rename = {ctx.program->allocateId(), pair.first.regClass()};
927 for (unsigned i = 0; i < phi->operands.size(); i++) {
928 Temp tmp;
929 if (ctx.renames[preds[i]].find(pair.first) != ctx.renames[preds[i]].end())
930 tmp = ctx.renames[preds[i]][pair.first];
931 else if (preds[i] >= block_idx)
932 tmp = rename;
933 else
934 tmp = pair.first;
935 phi->operands[i] = Operand(tmp);
936 }
937 phi->definitions[0] = Definition(rename);
938 instructions.emplace_back(std::move(phi));
939 }
940
941 /* the variable was renamed: add new name to renames */
942 if (!(rename == Temp() || rename == pair.first))
943 ctx.renames[block_idx][pair.first] = rename;
944 }
945
946 /* combine phis with instructions */
947 unsigned idx = 0;
948 while (!block->instructions[idx]) {
949 idx++;
950 }
951
952 ctx.register_demand[block->index].erase(ctx.register_demand[block->index].begin(), ctx.register_demand[block->index].begin() + idx);
953 ctx.register_demand[block->index].insert(ctx.register_demand[block->index].begin(), instructions.size(), RegisterDemand());
954
955 std::vector<aco_ptr<Instruction>>::iterator start = std::next(block->instructions.begin(), idx);
956 instructions.insert(instructions.end(), std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(start),
957 std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(block->instructions.end()));
958 block->instructions = std::move(instructions);
959 }
960
961 void process_block(spill_ctx& ctx, unsigned block_idx, Block* block,
962 std::map<Temp, uint32_t> &current_spills, RegisterDemand spilled_registers)
963 {
964 std::vector<std::map<Temp, uint32_t>> local_next_use_distance;
965 std::vector<aco_ptr<Instruction>> instructions;
966 unsigned idx = 0;
967
968 /* phis are handled separetely */
969 while (block->instructions[idx]->opcode == aco_opcode::p_phi ||
970 block->instructions[idx]->opcode == aco_opcode::p_linear_phi) {
971 aco_ptr<Instruction>& instr = block->instructions[idx];
972 for (const Operand& op : instr->operands) {
973 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
974 if (op.isTemp() && ctx.remat.count(op.getTemp()))
975 ctx.remat_used[ctx.remat[op.getTemp()].instr] = true;
976 }
977 instructions.emplace_back(std::move(instr));
978 idx++;
979 }
980
981 if (block->register_demand.exceeds(ctx.target_pressure))
982 local_next_use_distance = local_next_uses(ctx, block);
983
984 while (idx < block->instructions.size()) {
985 aco_ptr<Instruction>& instr = block->instructions[idx];
986
987 std::map<Temp, std::pair<Temp, uint32_t>> reloads;
988 std::map<Temp, uint32_t> spills;
989 /* rename and reload operands */
990 for (Operand& op : instr->operands) {
991 if (!op.isTemp())
992 continue;
993 if (current_spills.find(op.getTemp()) == current_spills.end()) {
994 /* the Operand is in register: check if it was renamed */
995 if (ctx.renames[block_idx].find(op.getTemp()) != ctx.renames[block_idx].end())
996 op.setTemp(ctx.renames[block_idx][op.getTemp()]);
997 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
998 if (ctx.remat.count(op.getTemp()))
999 ctx.remat_used[ctx.remat[op.getTemp()].instr] = true;
1000 continue;
1001 }
1002 /* the Operand is spilled: add it to reloads */
1003 Temp new_tmp = {ctx.program->allocateId(), op.regClass()};
1004 ctx.renames[block_idx][op.getTemp()] = new_tmp;
1005 reloads[new_tmp] = std::make_pair(op.getTemp(), current_spills[op.getTemp()]);
1006 current_spills.erase(op.getTemp());
1007 op.setTemp(new_tmp);
1008 spilled_registers -= new_tmp;
1009 }
1010
1011 /* check if register demand is low enough before and after the current instruction */
1012 if (block->register_demand.exceeds(ctx.target_pressure)) {
1013
1014 RegisterDemand new_demand = ctx.register_demand[block_idx][idx];
1015 if (idx == 0) {
1016 for (const Definition& def : instr->definitions) {
1017 if (!def.isTemp())
1018 continue;
1019 new_demand += def.getTemp();
1020 }
1021 } else {
1022 new_demand.update(ctx.register_demand[block_idx][idx - 1]);
1023 }
1024
1025 assert(!local_next_use_distance.empty());
1026
1027 /* if reg pressure is too high, spill variable with furthest next use */
1028 while (RegisterDemand(new_demand - spilled_registers).exceeds(ctx.target_pressure)) {
1029 unsigned distance = 0;
1030 Temp to_spill;
1031 bool do_rematerialize = false;
1032 if (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr) {
1033 for (std::pair<Temp, uint32_t> pair : local_next_use_distance[idx]) {
1034 bool can_rematerialize = ctx.remat.count(pair.first);
1035 if (pair.first.type() == RegType::vgpr &&
1036 ((pair.second > distance && can_rematerialize == do_rematerialize) ||
1037 (can_rematerialize && !do_rematerialize && pair.second > idx)) &&
1038 current_spills.find(pair.first) == current_spills.end() &&
1039 ctx.spills_exit[block_idx].find(pair.first) == ctx.spills_exit[block_idx].end()) {
1040 to_spill = pair.first;
1041 distance = pair.second;
1042 do_rematerialize = can_rematerialize;
1043 }
1044 }
1045 } else {
1046 for (std::pair<Temp, uint32_t> pair : local_next_use_distance[idx]) {
1047 bool can_rematerialize = ctx.remat.count(pair.first);
1048 if (pair.first.type() == RegType::sgpr &&
1049 ((pair.second > distance && can_rematerialize == do_rematerialize) ||
1050 (can_rematerialize && !do_rematerialize && pair.second > idx)) &&
1051 current_spills.find(pair.first) == current_spills.end() &&
1052 ctx.spills_exit[block_idx].find(pair.first) == ctx.spills_exit[block_idx].end()) {
1053 to_spill = pair.first;
1054 distance = pair.second;
1055 do_rematerialize = can_rematerialize;
1056 }
1057 }
1058 }
1059
1060 assert(distance != 0 && distance > idx);
1061 uint32_t spill_id = ctx.allocate_spill_id(to_spill.regClass());
1062
1063 /* add interferences with currently spilled variables */
1064 for (std::pair<Temp, uint32_t> pair : current_spills) {
1065 ctx.interferences[spill_id].second.emplace(pair.second);
1066 ctx.interferences[pair.second].second.emplace(spill_id);
1067 }
1068 for (std::pair<Temp, std::pair<Temp, uint32_t>> pair : reloads) {
1069 ctx.interferences[spill_id].second.emplace(pair.second.second);
1070 ctx.interferences[pair.second.second].second.emplace(spill_id);
1071 }
1072
1073 current_spills[to_spill] = spill_id;
1074 spilled_registers += to_spill;
1075
1076 /* rename if necessary */
1077 if (ctx.renames[block_idx].find(to_spill) != ctx.renames[block_idx].end()) {
1078 to_spill = ctx.renames[block_idx][to_spill];
1079 }
1080
1081 /* add spill to new instructions */
1082 aco_ptr<Pseudo_instruction> spill{create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
1083 spill->operands[0] = Operand(to_spill);
1084 spill->operands[1] = Operand(spill_id);
1085 instructions.emplace_back(std::move(spill));
1086 }
1087 }
1088
1089 /* add reloads and instruction to new instructions */
1090 for (std::pair<Temp, std::pair<Temp, uint32_t>> pair : reloads) {
1091 aco_ptr<Instruction> reload = do_reload(ctx, pair.second.first, pair.first, pair.second.second);
1092 instructions.emplace_back(std::move(reload));
1093 }
1094 instructions.emplace_back(std::move(instr));
1095 idx++;
1096 }
1097
1098 block->instructions = std::move(instructions);
1099 ctx.spills_exit[block_idx].insert(current_spills.begin(), current_spills.end());
1100 }
1101
1102 void spill_block(spill_ctx& ctx, unsigned block_idx)
1103 {
1104 Block* block = &ctx.program->blocks[block_idx];
1105 ctx.processed[block_idx] = true;
1106
1107 /* determine set of variables which are spilled at the beginning of the block */
1108 RegisterDemand spilled_registers = init_live_in_vars(ctx, block, block_idx);
1109
1110 /* add interferences for spilled variables */
1111 for (std::pair<Temp, uint32_t> x : ctx.spills_entry[block_idx]) {
1112 for (std::pair<Temp, uint32_t> y : ctx.spills_entry[block_idx])
1113 if (x.second != y.second)
1114 ctx.interferences[x.second].second.emplace(y.second);
1115 }
1116
1117 bool is_loop_header = block->loop_nest_depth && ctx.loop_header.top()->index == block_idx;
1118 if (!is_loop_header) {
1119 /* add spill/reload code on incoming control flow edges */
1120 add_coupling_code(ctx, block, block_idx);
1121 }
1122
1123 std::map<Temp, uint32_t> current_spills = ctx.spills_entry[block_idx];
1124
1125 /* check conditions to process this block */
1126 bool process = RegisterDemand(block->register_demand - spilled_registers).exceeds(ctx.target_pressure) ||
1127 !ctx.renames[block_idx].empty() ||
1128 ctx.remat_used.size();
1129
1130 std::map<Temp, uint32_t>::iterator it = current_spills.begin();
1131 while (!process && it != current_spills.end()) {
1132 if (ctx.next_use_distances_start[block_idx][it->first].first == block_idx)
1133 process = true;
1134 ++it;
1135 }
1136
1137 if (process)
1138 process_block(ctx, block_idx, block, current_spills, spilled_registers);
1139 else
1140 ctx.spills_exit[block_idx].insert(current_spills.begin(), current_spills.end());
1141
1142 /* check if the next block leaves the current loop */
1143 if (block->loop_nest_depth == 0 || ctx.program->blocks[block_idx + 1].loop_nest_depth >= block->loop_nest_depth)
1144 return;
1145
1146 Block* loop_header = ctx.loop_header.top();
1147
1148 /* preserve original renames at end of loop header block */
1149 std::map<Temp, Temp> renames = std::move(ctx.renames[loop_header->index]);
1150
1151 /* add coupling code to all loop header predecessors */
1152 add_coupling_code(ctx, loop_header, loop_header->index);
1153
1154 /* update remat_used for phis added in add_coupling_code() */
1155 for (aco_ptr<Instruction>& instr : loop_header->instructions) {
1156 if (!is_phi(instr))
1157 break;
1158 for (const Operand& op : instr->operands) {
1159 if (op.isTemp() && ctx.remat.count(op.getTemp()))
1160 ctx.remat_used[ctx.remat[op.getTemp()].instr] = true;
1161 }
1162 }
1163
1164 /* propagate new renames through loop: i.e. repair the SSA */
1165 renames.swap(ctx.renames[loop_header->index]);
1166 for (std::pair<Temp, Temp> rename : renames) {
1167 for (unsigned idx = loop_header->index; idx <= block_idx; idx++) {
1168 Block& current = ctx.program->blocks[idx];
1169 std::vector<aco_ptr<Instruction>>::iterator instr_it = current.instructions.begin();
1170
1171 /* first rename phis */
1172 while (instr_it != current.instructions.end()) {
1173 aco_ptr<Instruction>& phi = *instr_it;
1174 if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi)
1175 break;
1176 /* no need to rename the loop header phis once again. this happened in add_coupling_code() */
1177 if (idx == loop_header->index) {
1178 instr_it++;
1179 continue;
1180 }
1181
1182 for (Operand& op : phi->operands) {
1183 if (!op.isTemp())
1184 continue;
1185 if (op.getTemp() == rename.first)
1186 op.setTemp(rename.second);
1187 }
1188 instr_it++;
1189 }
1190
1191 std::map<Temp, std::pair<uint32_t, uint32_t>>::iterator it = ctx.next_use_distances_start[idx].find(rename.first);
1192
1193 /* variable is not live at beginning of this block */
1194 if (it == ctx.next_use_distances_start[idx].end())
1195 continue;
1196
1197 /* if the variable is live at the block's exit, add rename */
1198 if (ctx.next_use_distances_end[idx].find(rename.first) != ctx.next_use_distances_end[idx].end())
1199 ctx.renames[idx].insert(rename);
1200
1201 /* rename all uses in this block */
1202 bool renamed = false;
1203 while (!renamed && instr_it != current.instructions.end()) {
1204 aco_ptr<Instruction>& instr = *instr_it;
1205 for (Operand& op : instr->operands) {
1206 if (!op.isTemp())
1207 continue;
1208 if (op.getTemp() == rename.first) {
1209 op.setTemp(rename.second);
1210 /* we can stop with this block as soon as the variable is spilled */
1211 if (instr->opcode == aco_opcode::p_spill)
1212 renamed = true;
1213 }
1214 }
1215 instr_it++;
1216 }
1217 }
1218 }
1219
1220 /* remove loop header info from stack */
1221 ctx.loop_header.pop();
1222 }
1223
1224 void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
1225 std::map<uint32_t, uint32_t> sgpr_slot;
1226 std::map<uint32_t, uint32_t> vgpr_slot;
1227 std::vector<bool> is_assigned(ctx.interferences.size());
1228
1229 /* first, handle affinities: just merge all interferences into both spill ids */
1230 for (std::pair<uint32_t, uint32_t> pair : ctx.affinities) {
1231 assert(pair.first != pair.second);
1232 for (uint32_t id : ctx.interferences[pair.first].second)
1233 ctx.interferences[id].second.insert(pair.second);
1234 for (uint32_t id : ctx.interferences[pair.second].second)
1235 ctx.interferences[id].second.insert(pair.first);
1236 ctx.interferences[pair.first].second.insert(ctx.interferences[pair.second].second.begin(), ctx.interferences[pair.second].second.end());
1237 ctx.interferences[pair.second].second.insert(ctx.interferences[pair.first].second.begin(), ctx.interferences[pair.first].second.end());
1238
1239 bool reloaded = ctx.is_reloaded[pair.first] || ctx.is_reloaded[pair.second];
1240 ctx.is_reloaded[pair.first] = ctx.is_reloaded[pair.second] = reloaded;
1241 }
1242 for (ASSERTED uint32_t i = 0; i < ctx.interferences.size(); i++)
1243 for (ASSERTED uint32_t id : ctx.interferences[i].second)
1244 assert(i != id);
1245
1246 /* for each spill slot, assign as many spill ids as possible */
1247 std::vector<std::set<uint32_t>> spill_slot_interferences;
1248 unsigned slot_idx = 0;
1249 bool done = false;
1250
1251 /* assign sgpr spill slots */
1252 while (!done) {
1253 done = true;
1254 for (unsigned id = 0; id < ctx.interferences.size(); id++) {
1255 if (is_assigned[id] || !ctx.is_reloaded[id])
1256 continue;
1257 if (ctx.interferences[id].first.type() != RegType::sgpr)
1258 continue;
1259
1260 /* check interferences */
1261 bool interferes = false;
1262 for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) {
1263 if (i == spill_slot_interferences.size())
1264 spill_slot_interferences.emplace_back(std::set<uint32_t>());
1265 if (spill_slot_interferences[i].find(id) != spill_slot_interferences[i].end() || i / 64 != slot_idx / 64) {
1266 interferes = true;
1267 break;
1268 }
1269 }
1270 if (interferes) {
1271 done = false;
1272 continue;
1273 }
1274
1275 /* we found a spill id which can be assigned to current spill slot */
1276 sgpr_slot[id] = slot_idx;
1277 is_assigned[id] = true;
1278 for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++)
1279 spill_slot_interferences[i].insert(ctx.interferences[id].second.begin(), ctx.interferences[id].second.end());
1280 }
1281 slot_idx++;
1282 }
1283
1284 slot_idx = 0;
1285 done = false;
1286
1287 /* assign vgpr spill slots */
1288 while (!done) {
1289 done = true;
1290 for (unsigned id = 0; id < ctx.interferences.size(); id++) {
1291 if (is_assigned[id] || !ctx.is_reloaded[id])
1292 continue;
1293 if (ctx.interferences[id].first.type() != RegType::vgpr)
1294 continue;
1295
1296 /* check interferences */
1297 bool interferes = false;
1298 for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) {
1299 if (i == spill_slot_interferences.size())
1300 spill_slot_interferences.emplace_back(std::set<uint32_t>());
1301 /* check for interference and ensure that vector regs are stored next to each other */
1302 if (spill_slot_interferences[i].find(id) != spill_slot_interferences[i].end() || i / 64 != slot_idx / 64) {
1303 interferes = true;
1304 break;
1305 }
1306 }
1307 if (interferes) {
1308 done = false;
1309 continue;
1310 }
1311
1312 /* we found a spill id which can be assigned to current spill slot */
1313 vgpr_slot[id] = slot_idx;
1314 is_assigned[id] = true;
1315 for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++)
1316 spill_slot_interferences[i].insert(ctx.interferences[id].second.begin(), ctx.interferences[id].second.end());
1317 }
1318 slot_idx++;
1319 }
1320
1321 for (unsigned id = 0; id < is_assigned.size(); id++)
1322 assert(is_assigned[id] || !ctx.is_reloaded[id]);
1323
1324 for (std::pair<uint32_t, uint32_t> pair : ctx.affinities) {
1325 assert(is_assigned[pair.first] == is_assigned[pair.second]);
1326 if (!is_assigned[pair.first])
1327 continue;
1328 assert(ctx.is_reloaded[pair.first] == ctx.is_reloaded[pair.second]);
1329 assert(ctx.interferences[pair.first].first.type() == ctx.interferences[pair.second].first.type());
1330 if (ctx.interferences[pair.first].first.type() == RegType::sgpr)
1331 assert(sgpr_slot[pair.first] == sgpr_slot[pair.second]);
1332 else
1333 assert(vgpr_slot[pair.first] == vgpr_slot[pair.second]);
1334 }
1335
1336 /* hope, we didn't mess up */
1337 std::vector<Temp> vgpr_spill_temps((spill_slot_interferences.size() + 63) / 64);
1338 assert(vgpr_spill_temps.size() <= spills_to_vgpr);
1339
1340 /* replace pseudo instructions with actual hardware instructions */
1341 unsigned last_top_level_block_idx = 0;
1342 std::vector<bool> reload_in_loop(vgpr_spill_temps.size());
1343 for (Block& block : ctx.program->blocks) {
1344
1345 /* after loops, we insert a user if there was a reload inside the loop */
1346 if (block.loop_nest_depth == 0) {
1347 int end_vgprs = 0;
1348 for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
1349 if (reload_in_loop[i])
1350 end_vgprs++;
1351 }
1352
1353 if (end_vgprs > 0) {
1354 aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, end_vgprs, 0)};
1355 int k = 0;
1356 for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
1357 if (reload_in_loop[i])
1358 destr->operands[k++] = Operand(vgpr_spill_temps[i]);
1359 reload_in_loop[i] = false;
1360 }
1361 /* find insertion point */
1362 std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
1363 while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
1364 ++it;
1365 block.instructions.insert(it, std::move(destr));
1366 }
1367 }
1368
1369 if (block.kind & block_kind_top_level && !block.linear_preds.empty()) {
1370 last_top_level_block_idx = block.index;
1371
1372 /* check if any spilled variables use a created linear vgpr, otherwise destroy them */
1373 for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
1374 if (vgpr_spill_temps[i] == Temp())
1375 continue;
1376
1377 bool can_destroy = true;
1378 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[block.linear_preds[0]]) {
1379
1380 if (sgpr_slot.find(pair.second) != sgpr_slot.end() &&
1381 sgpr_slot[pair.second] / 64 == i) {
1382 can_destroy = false;
1383 break;
1384 }
1385 }
1386 if (can_destroy)
1387 vgpr_spill_temps[i] = Temp();
1388 }
1389 }
1390
1391 std::vector<aco_ptr<Instruction>>::iterator it;
1392 std::vector<aco_ptr<Instruction>> instructions;
1393 instructions.reserve(block.instructions.size());
1394 for (it = block.instructions.begin(); it != block.instructions.end(); ++it) {
1395
1396 if ((*it)->opcode == aco_opcode::p_spill) {
1397 uint32_t spill_id = (*it)->operands[1].constantValue();
1398
1399 if (!ctx.is_reloaded[spill_id]) {
1400 /* never reloaded, so don't spill */
1401 } else if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
1402 /* spill vgpr */
1403 ctx.program->config->spilled_vgprs += (*it)->operands[0].size();
1404
1405 assert(false && "vgpr spilling not yet implemented.");
1406 } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
1407 ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
1408
1409 uint32_t spill_slot = sgpr_slot[spill_id];
1410
1411 /* check if the linear vgpr already exists */
1412 if (vgpr_spill_temps[spill_slot / 64] == Temp()) {
1413 Temp linear_vgpr = {ctx.program->allocateId(), v1.as_linear()};
1414 vgpr_spill_temps[spill_slot / 64] = linear_vgpr;
1415 aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
1416 create->definitions[0] = Definition(linear_vgpr);
1417 /* find the right place to insert this definition */
1418 if (last_top_level_block_idx == block.index) {
1419 /* insert right before the current instruction */
1420 instructions.emplace_back(std::move(create));
1421 } else {
1422 assert(last_top_level_block_idx < block.index);
1423 /* insert before the branch at last top level block */
1424 std::vector<aco_ptr<Instruction>>& instructions = ctx.program->blocks[last_top_level_block_idx].instructions;
1425 instructions.insert(std::next(instructions.begin(), instructions.size() - 1), std::move(create));
1426 }
1427 }
1428
1429 /* spill sgpr: just add the vgpr temp to operands */
1430 Pseudo_instruction* spill = create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
1431 spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / 64]);
1432 spill->operands[1] = Operand(spill_slot % 64);
1433 spill->operands[2] = (*it)->operands[0];
1434 instructions.emplace_back(aco_ptr<Instruction>(spill));
1435 } else {
1436 unreachable("No spill slot assigned for spill id");
1437 }
1438
1439 } else if ((*it)->opcode == aco_opcode::p_reload) {
1440 uint32_t spill_id = (*it)->operands[0].constantValue();
1441 assert(ctx.is_reloaded[spill_id]);
1442
1443 if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
1444 /* reload vgpr */
1445 assert(false && "vgpr spilling not yet implemented.");
1446
1447 } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
1448 uint32_t spill_slot = sgpr_slot[spill_id];
1449 reload_in_loop[spill_slot / 64] = block.loop_nest_depth > 0;
1450
1451 /* check if the linear vgpr already exists */
1452 if (vgpr_spill_temps[spill_slot / 64] == Temp()) {
1453 Temp linear_vgpr = {ctx.program->allocateId(), v1.as_linear()};
1454 vgpr_spill_temps[spill_slot / 64] = linear_vgpr;
1455 aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
1456 create->definitions[0] = Definition(linear_vgpr);
1457 /* find the right place to insert this definition */
1458 if (last_top_level_block_idx == block.index) {
1459 /* insert right before the current instruction */
1460 instructions.emplace_back(std::move(create));
1461 } else {
1462 assert(last_top_level_block_idx < block.index);
1463 /* insert before the branch at last top level block */
1464 std::vector<aco_ptr<Instruction>>& instructions = ctx.program->blocks[last_top_level_block_idx].instructions;
1465 instructions.insert(std::next(instructions.begin(), instructions.size() - 1), std::move(create));
1466 }
1467 }
1468
1469 /* reload sgpr: just add the vgpr temp to operands */
1470 Pseudo_instruction* reload = create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 2, 1);
1471 reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / 64]);
1472 reload->operands[1] = Operand(spill_slot % 64);
1473 reload->definitions[0] = (*it)->definitions[0];
1474 instructions.emplace_back(aco_ptr<Instruction>(reload));
1475 } else {
1476 unreachable("No spill slot assigned for spill id");
1477 }
1478 } else if (!ctx.remat_used.count(it->get()) || ctx.remat_used[it->get()]) {
1479 instructions.emplace_back(std::move(*it));
1480 }
1481
1482 }
1483 block.instructions = std::move(instructions);
1484 }
1485
1486 /* SSA elimination inserts copies for logical phis right before p_logical_end
1487 * So if a linear vgpr is used between that p_logical_end and the branch,
1488 * we need to ensure logical phis don't choose a definition which aliases
1489 * the linear vgpr.
1490 * TODO: Moving the spills and reloads to before p_logical_end might produce
1491 * slightly better code. */
1492 for (Block& block : ctx.program->blocks) {
1493 /* loops exits are already handled */
1494 if (block.logical_preds.size() <= 1)
1495 continue;
1496
1497 bool has_logical_phis = false;
1498 for (aco_ptr<Instruction>& instr : block.instructions) {
1499 if (instr->opcode == aco_opcode::p_phi) {
1500 has_logical_phis = true;
1501 break;
1502 } else if (instr->opcode != aco_opcode::p_linear_phi) {
1503 break;
1504 }
1505 }
1506 if (!has_logical_phis)
1507 continue;
1508
1509 std::set<Temp> vgprs;
1510 for (unsigned pred_idx : block.logical_preds) {
1511 Block& pred = ctx.program->blocks[pred_idx];
1512 for (int i = pred.instructions.size() - 1; i >= 0; i--) {
1513 aco_ptr<Instruction>& pred_instr = pred.instructions[i];
1514 if (pred_instr->opcode == aco_opcode::p_logical_end) {
1515 break;
1516 } else if (pred_instr->opcode == aco_opcode::p_spill ||
1517 pred_instr->opcode == aco_opcode::p_reload) {
1518 vgprs.insert(pred_instr->operands[0].getTemp());
1519 }
1520 }
1521 }
1522 if (!vgprs.size())
1523 continue;
1524
1525 aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vgprs.size(), 0)};
1526 int k = 0;
1527 for (Temp tmp : vgprs) {
1528 destr->operands[k++] = Operand(tmp);
1529 }
1530 /* find insertion point */
1531 std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
1532 while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
1533 ++it;
1534 block.instructions.insert(it, std::move(destr));
1535 }
1536 }
1537
1538 } /* end namespace */
1539
1540
1541 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options)
1542 {
1543 program->config->spilled_vgprs = 0;
1544 program->config->spilled_sgprs = 0;
1545
1546 /* no spilling when wave count is already high */
1547 if (program->num_waves >= 6)
1548 return;
1549
1550 /* lower to CSSA before spilling to ensure correctness w.r.t. phis */
1551 lower_to_cssa(program, live_vars, options);
1552
1553 /* else, we check if we can improve things a bit */
1554
1555 /* calculate target register demand */
1556 RegisterDemand max_reg_demand;
1557 for (Block& block : program->blocks) {
1558 max_reg_demand.update(block.register_demand);
1559 }
1560
1561 RegisterDemand target_pressure = {256, int16_t(program->sgpr_limit)};
1562 unsigned num_waves = 1;
1563 int spills_to_vgpr = (max_reg_demand.sgpr - program->sgpr_limit + 63) / 64;
1564
1565 /* test if it possible to increase occupancy with little spilling */
1566 for (unsigned num_waves_next = 2; num_waves_next <= program->max_waves; num_waves_next++) {
1567 RegisterDemand target_pressure_next = {int16_t((256 / num_waves_next) & ~3),
1568 int16_t(get_addr_sgpr_from_waves(program, num_waves_next))};
1569
1570 /* Currently no vgpr spilling supported.
1571 * Spill as many sgprs as necessary to not hinder occupancy */
1572 if (max_reg_demand.vgpr > target_pressure_next.vgpr)
1573 break;
1574 /* check that we have enough free vgprs to spill sgprs to */
1575 if (max_reg_demand.sgpr > target_pressure_next.sgpr) {
1576 /* add some buffer in case graph coloring is not perfect ... */
1577 const int spills_to_vgpr_next = (max_reg_demand.sgpr - target_pressure_next.sgpr + 63 + 32) / 64;
1578 if (spills_to_vgpr_next + max_reg_demand.vgpr > target_pressure_next.vgpr)
1579 break;
1580 spills_to_vgpr = spills_to_vgpr_next;
1581 }
1582
1583 target_pressure = target_pressure_next;
1584 num_waves = num_waves_next;
1585 }
1586
1587 assert(max_reg_demand.vgpr <= target_pressure.vgpr && "VGPR spilling not yet supported.");
1588 /* nothing to do */
1589 if (num_waves == program->num_waves)
1590 return;
1591
1592 /* initialize ctx */
1593 spill_ctx ctx(target_pressure, program, live_vars.register_demand);
1594 compute_global_next_uses(ctx, live_vars.live_out);
1595 get_rematerialize_info(ctx);
1596
1597 /* create spills and reloads */
1598 for (unsigned i = 0; i < program->blocks.size(); i++)
1599 spill_block(ctx, i);
1600
1601 /* assign spill slots and DCE rematerialized code */
1602 assign_spill_slots(ctx, spills_to_vgpr);
1603
1604 /* update live variable information */
1605 live_vars = live_var_analysis(program, options);
1606
1607 assert(program->num_waves >= num_waves);
1608 }
1609
1610 }
1611