2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 void perfwarn(bool cond
, const char *msg
, Instruction
*instr
)
36 fprintf(stderr
, "ACO performance warning: %s\n", msg
);
38 fprintf(stderr
, "instruction: ");
39 aco_print_instr(instr
, stderr
);
40 fprintf(stderr
, "\n");
43 if (debug_flags
& DEBUG_PERFWARN
)
49 void validate(Program
* program
, FILE * output
)
51 if (!(debug_flags
& DEBUG_VALIDATE
))
55 auto check
= [&output
, &is_valid
](bool check
, const char * msg
, aco::Instruction
* instr
) -> void {
57 fprintf(output
, "%s: ", msg
);
58 aco_print_instr(instr
, output
);
59 fprintf(output
, "\n");
63 auto check_block
= [&output
, &is_valid
](bool check
, const char * msg
, aco::Block
* block
) -> void {
65 fprintf(output
, "%s: BB%u\n", msg
, block
->index
);
70 for (Block
& block
: program
->blocks
) {
71 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
73 /* check base format */
74 Format base_format
= instr
->format
;
75 base_format
= (Format
)((uint32_t)base_format
& ~(uint32_t)Format::SDWA
);
76 base_format
= (Format
)((uint32_t)base_format
& ~(uint32_t)Format::DPP
);
77 if ((uint32_t)base_format
& (uint32_t)Format::VOP1
)
78 base_format
= Format::VOP1
;
79 else if ((uint32_t)base_format
& (uint32_t)Format::VOP2
)
80 base_format
= Format::VOP2
;
81 else if ((uint32_t)base_format
& (uint32_t)Format::VOPC
)
82 base_format
= Format::VOPC
;
83 else if ((uint32_t)base_format
& (uint32_t)Format::VINTRP
) {
84 if (instr
->opcode
== aco_opcode::v_interp_p1ll_f16
||
85 instr
->opcode
== aco_opcode::v_interp_p1lv_f16
||
86 instr
->opcode
== aco_opcode::v_interp_p2_legacy_f16
||
87 instr
->opcode
== aco_opcode::v_interp_p2_f16
) {
88 /* v_interp_*_fp16 are considered VINTRP by the compiler but
89 * they are emitted as VOP3.
91 base_format
= Format::VOP3
;
93 base_format
= Format::VINTRP
;
96 check(base_format
== instr_info
.format
[(int)instr
->opcode
], "Wrong base format for instruction", instr
.get());
98 /* check VOP3 modifiers */
99 if (((uint32_t)instr
->format
& (uint32_t)Format::VOP3
) && instr
->format
!= Format::VOP3
) {
100 check(base_format
== Format::VOP2
||
101 base_format
== Format::VOP1
||
102 base_format
== Format::VOPC
||
103 base_format
== Format::VINTRP
,
104 "Format cannot have VOP3A/VOP3B applied", instr
.get());
108 if (instr
->isSDWA()) {
109 check(base_format
== Format::VOP2
||
110 base_format
== Format::VOP1
||
111 base_format
== Format::VOPC
,
112 "Format cannot have SDWA applied", instr
.get());
114 check(program
->chip_class
>= GFX8
, "SDWA is GFX8+ only", instr
.get());
116 SDWA_instruction
*sdwa
= static_cast<SDWA_instruction
*>(instr
.get());
117 check(sdwa
->omod
== 0 || program
->chip_class
>= GFX9
, "SDWA omod only supported on GFX9+", instr
.get());
118 if (base_format
== Format::VOPC
) {
119 check(sdwa
->clamp
== false || program
->chip_class
== GFX8
, "SDWA VOPC clamp only supported on GFX8", instr
.get());
120 check((instr
->definitions
[0].isFixed() && instr
->definitions
[0].physReg() == vcc
) ||
121 program
->chip_class
>= GFX9
,
122 "SDWA+VOPC definition must be fixed to vcc on GFX8", instr
.get());
125 if (instr
->operands
.size() >= 3) {
126 check(instr
->operands
[2].isFixed() && instr
->operands
[2].physReg() == vcc
,
127 "3rd operand must be fixed to vcc with SDWA", instr
.get());
129 if (instr
->definitions
.size() >= 2) {
130 check(instr
->definitions
[1].isFixed() && instr
->definitions
[1].physReg() == vcc
,
131 "2nd definition must be fixed to vcc with SDWA", instr
.get());
134 check(instr
->opcode
!= aco_opcode::v_madmk_f32
&&
135 instr
->opcode
!= aco_opcode::v_madak_f32
&&
136 instr
->opcode
!= aco_opcode::v_madmk_f16
&&
137 instr
->opcode
!= aco_opcode::v_madak_f16
&&
138 instr
->opcode
!= aco_opcode::v_readfirstlane_b32
&&
139 instr
->opcode
!= aco_opcode::v_clrexcp
&&
140 instr
->opcode
!= aco_opcode::v_swap_b32
,
141 "SDWA can't be used with this opcode", instr
.get());
142 if (program
->chip_class
!= GFX8
) {
143 check(instr
->opcode
!= aco_opcode::v_mac_f32
&&
144 instr
->opcode
!= aco_opcode::v_mac_f16
&&
145 instr
->opcode
!= aco_opcode::v_fmac_f32
&&
146 instr
->opcode
!= aco_opcode::v_fmac_f16
,
147 "SDWA can't be used with this opcode", instr
.get());
152 if (instr
->isVOP3()) {
153 VOP3A_instruction
*vop3
= static_cast<VOP3A_instruction
*>(instr
.get());
154 check(vop3
->opsel
== 0 || program
->chip_class
>= GFX9
, "Opsel is only supported on GFX9+", instr
.get());
155 check((vop3
->opsel
& ~(0x10 | ((1 << instr
->operands
.size()) - 1))) == 0, "Unused bits in opsel must be zeroed out", instr
.get());
158 /* check for undefs */
159 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
160 if (instr
->operands
[i
].isUndefined()) {
161 bool flat
= instr
->format
== Format::FLAT
|| instr
->format
== Format::SCRATCH
|| instr
->format
== Format::GLOBAL
;
162 bool can_be_undef
= is_phi(instr
) || instr
->format
== Format::EXP
||
163 instr
->format
== Format::PSEUDO_REDUCTION
||
164 instr
->opcode
== aco_opcode::p_create_vector
||
165 (flat
&& i
== 1) || (instr
->format
== Format::MIMG
&& i
== 1) ||
166 ((instr
->format
== Format::MUBUF
|| instr
->format
== Format::MTBUF
) && i
== 1);
167 check(can_be_undef
, "Undefs can only be used in certain operands", instr
.get());
169 check(instr
->operands
[i
].isFixed() || instr
->operands
[i
].isTemp() || instr
->operands
[i
].isConstant(), "Uninitialized Operand", instr
.get());
173 /* check subdword definitions */
174 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
175 if (instr
->definitions
[i
].regClass().is_subdword())
176 check(instr
->format
== Format::PSEUDO
|| instr
->definitions
[i
].bytes() <= 4, "Only Pseudo instructions can write subdword registers larger than 4 bytes", instr
.get());
179 if (instr
->isSALU() || instr
->isVALU()) {
182 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
184 Operand op
= instr
->operands
[i
];
188 check(instr
->format
== Format::SOP1
||
189 instr
->format
== Format::SOP2
||
190 instr
->format
== Format::SOPC
||
191 instr
->format
== Format::VOP1
||
192 instr
->format
== Format::VOP2
||
193 instr
->format
== Format::VOPC
||
194 (instr
->isVOP3() && program
->chip_class
>= GFX10
),
195 "Literal applied on wrong instruction format", instr
.get());
197 check(literal
.isUndefined() || (literal
.size() == op
.size() && literal
.constantValue() == op
.constantValue()), "Only 1 Literal allowed", instr
.get());
199 check(!instr
->isVALU() || instr
->isVOP3() || i
== 0 || i
== 2, "Wrong source position for Literal argument", instr
.get());
202 /* check num sgprs for VALU */
203 if (instr
->isVALU()) {
204 bool is_shift64
= instr
->opcode
== aco_opcode::v_lshlrev_b64
||
205 instr
->opcode
== aco_opcode::v_lshrrev_b64
||
206 instr
->opcode
== aco_opcode::v_ashrrev_i64
;
207 unsigned const_bus_limit
= 1;
208 if (program
->chip_class
>= GFX10
&& !is_shift64
)
211 uint32_t scalar_mask
= instr
->isVOP3() ? 0x7 : 0x5;
213 scalar_mask
= program
->chip_class
>= GFX9
? 0x7 : 0x4;
215 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
||
216 (int) instr
->format
& (int) Format::VOPC
||
217 instr
->opcode
== aco_opcode::v_readfirstlane_b32
||
218 instr
->opcode
== aco_opcode::v_readlane_b32
||
219 instr
->opcode
== aco_opcode::v_readlane_b32_e64
,
220 "Wrong Definition type for VALU instruction", instr
.get());
221 unsigned num_sgprs
= 0;
222 unsigned sgpr
[] = {0, 0};
223 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
225 Operand op
= instr
->operands
[i
];
226 if (instr
->opcode
== aco_opcode::v_readfirstlane_b32
||
227 instr
->opcode
== aco_opcode::v_readlane_b32
||
228 instr
->opcode
== aco_opcode::v_readlane_b32_e64
||
229 instr
->opcode
== aco_opcode::v_writelane_b32
||
230 instr
->opcode
== aco_opcode::v_writelane_b32_e64
) {
231 check(!op
.isLiteral(), "No literal allowed on VALU instruction", instr
.get());
232 check(i
== 1 || (op
.isTemp() && op
.regClass().type() == RegType::vgpr
&& op
.bytes() <= 4), "Wrong Operand type for VALU instruction", instr
.get());
235 if (op
.isTemp() && instr
->operands
[i
].regClass().type() == RegType::sgpr
) {
236 check(scalar_mask
& (1 << i
), "Wrong source position for SGPR argument", instr
.get());
238 if (op
.tempId() != sgpr
[0] && op
.tempId() != sgpr
[1]) {
240 sgpr
[num_sgprs
++] = op
.tempId();
244 if (op
.isConstant() && !op
.isLiteral())
245 check(scalar_mask
& (1 << i
), "Wrong source position for constant argument", instr
.get());
247 check(num_sgprs
+ (literal
.isUndefined() ? 0 : 1) <= const_bus_limit
, "Too many SGPRs/literals", instr
.get());
250 if (instr
->format
== Format::SOP1
|| instr
->format
== Format::SOP2
) {
251 check(instr
->definitions
[0].getTemp().type() == RegType::sgpr
, "Wrong Definition type for SALU instruction", instr
.get());
252 for (const Operand
& op
: instr
->operands
) {
253 check(op
.isConstant() || op
.regClass().type() <= RegType::sgpr
,
254 "Wrong Operand type for SALU instruction", instr
.get());
259 switch (instr
->format
) {
260 case Format::PSEUDO
: {
261 if (instr
->opcode
== aco_opcode::p_create_vector
) {
263 for (const Operand
& op
: instr
->operands
) {
266 check(size
== instr
->definitions
[0].bytes(), "Definition size does not match operand sizes", instr
.get());
267 if (instr
->definitions
[0].getTemp().type() == RegType::sgpr
) {
268 for (const Operand
& op
: instr
->operands
) {
269 check(op
.isConstant() || op
.regClass().type() == RegType::sgpr
,
270 "Wrong Operand type for scalar vector", instr
.get());
273 } else if (instr
->opcode
== aco_opcode::p_extract_vector
) {
274 check((instr
->operands
[0].isTemp()) && instr
->operands
[1].isConstant(), "Wrong Operand types", instr
.get());
275 check((instr
->operands
[1].constantValue() + 1) * instr
->definitions
[0].bytes() <= instr
->operands
[0].bytes(), "Index out of range", instr
.get());
276 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
|| instr
->operands
[0].regClass().type() == RegType::sgpr
,
277 "Cannot extract SGPR value from VGPR vector", instr
.get());
278 } else if (instr
->opcode
== aco_opcode::p_parallelcopy
) {
279 check(instr
->definitions
.size() == instr
->operands
.size(), "Number of Operands does not match number of Definitions", instr
.get());
280 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
281 if (instr
->operands
[i
].isTemp())
282 check((instr
->definitions
[i
].getTemp().type() == instr
->operands
[i
].regClass().type()) ||
283 (instr
->definitions
[i
].getTemp().type() == RegType::vgpr
&& instr
->operands
[i
].regClass().type() == RegType::sgpr
),
284 "Operand and Definition types do not match", instr
.get());
286 } else if (instr
->opcode
== aco_opcode::p_phi
) {
287 check(instr
->operands
.size() == block
.logical_preds
.size(), "Number of Operands does not match number of predecessors", instr
.get());
288 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
|| instr
->definitions
[0].getTemp().regClass() == program
->lane_mask
, "Logical Phi Definition must be vgpr or divergent boolean", instr
.get());
289 } else if (instr
->opcode
== aco_opcode::p_linear_phi
) {
290 for (const Operand
& op
: instr
->operands
)
291 check(!op
.isTemp() || op
.getTemp().is_linear(), "Wrong Operand type", instr
.get());
292 check(instr
->operands
.size() == block
.linear_preds
.size(), "Number of Operands does not match number of predecessors", instr
.get());
297 if (instr
->operands
.size() >= 1)
298 check(instr
->operands
[0].isTemp() && instr
->operands
[0].regClass().type() == RegType::sgpr
, "SMEM operands must be sgpr", instr
.get());
299 if (instr
->operands
.size() >= 2)
300 check(instr
->operands
[1].isConstant() || (instr
->operands
[1].isTemp() && instr
->operands
[1].regClass().type() == RegType::sgpr
),
301 "SMEM offset must be constant or sgpr", instr
.get());
302 if (!instr
->definitions
.empty())
303 check(instr
->definitions
[0].getTemp().type() == RegType::sgpr
, "SMEM result must be sgpr", instr
.get());
307 case Format::MUBUF
: {
308 check(instr
->operands
.size() > 1, "VMEM instructions must have at least one operand", instr
.get());
309 check(instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::vgpr
,
310 "VADDR must be in vgpr for VMEM instructions", instr
.get());
311 check(instr
->operands
[0].isTemp() && instr
->operands
[0].regClass().type() == RegType::sgpr
, "VMEM resource constant must be sgpr", instr
.get());
312 check(instr
->operands
.size() < 4 || (instr
->operands
[3].isTemp() && instr
->operands
[3].regClass().type() == RegType::vgpr
), "VMEM write data must be vgpr", instr
.get());
316 check(instr
->operands
.size() == 3, "MIMG instructions must have exactly 3 operands", instr
.get());
317 check(instr
->operands
[0].hasRegClass() && (instr
->operands
[0].regClass() == s4
|| instr
->operands
[0].regClass() == s8
),
318 "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr
.get());
319 if (instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::sgpr
)
320 check(instr
->operands
[1].regClass() == s4
, "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr
.get());
321 else if (instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::vgpr
)
322 check((instr
->definitions
.empty() || instr
->definitions
[0].regClass() == instr
->operands
[1].regClass() ||
323 instr
->opcode
== aco_opcode::image_atomic_cmpswap
|| instr
->opcode
== aco_opcode::image_atomic_fcmpswap
),
324 "MIMG operands[1] (VDATA) must be the same as definitions[0] for atomics", instr
.get());
325 check(instr
->operands
[2].hasRegClass() && instr
->operands
[2].regClass().type() == RegType::vgpr
,
326 "MIMG operands[2] (VADDR) must be VGPR", instr
.get());
327 check(instr
->definitions
.empty() || (instr
->definitions
[0].isTemp() && instr
->definitions
[0].regClass().type() == RegType::vgpr
),
328 "MIMG definitions[0] (VDATA) must be VGPR", instr
.get());
332 for (const Operand
& op
: instr
->operands
) {
333 check((op
.isTemp() && op
.regClass().type() == RegType::vgpr
) || op
.physReg() == m0
,
334 "Only VGPRs are valid DS instruction operands", instr
.get());
336 if (!instr
->definitions
.empty())
337 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
, "DS instruction must return VGPR", instr
.get());
341 for (unsigned i
= 0; i
< 4; i
++)
342 check(instr
->operands
[i
].hasRegClass() && instr
->operands
[i
].regClass().type() == RegType::vgpr
,
343 "Only VGPRs are valid Export arguments", instr
.get());
347 check(instr
->operands
[1].isUndefined(), "Flat instructions don't support SADDR", instr
.get());
350 case Format::SCRATCH
: {
351 check(instr
->operands
[0].isTemp() && instr
->operands
[0].regClass().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH address must be vgpr", instr
.get());
352 check(instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::sgpr
,
353 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr
.get());
354 if (!instr
->definitions
.empty())
355 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH result must be vgpr", instr
.get());
357 check(instr
->operands
[2].regClass().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH data must be vgpr", instr
.get());
367 for (unsigned i
= 0; i
< program
->blocks
.size(); i
++) {
368 Block
& block
= program
->blocks
[i
];
369 check_block(block
.index
== i
, "block.index must match actual index", &block
);
371 /* predecessors/successors should be sorted */
372 for (unsigned j
= 0; j
+ 1 < block
.linear_preds
.size(); j
++)
373 check_block(block
.linear_preds
[j
] < block
.linear_preds
[j
+ 1], "linear predecessors must be sorted", &block
);
374 for (unsigned j
= 0; j
+ 1 < block
.logical_preds
.size(); j
++)
375 check_block(block
.logical_preds
[j
] < block
.logical_preds
[j
+ 1], "logical predecessors must be sorted", &block
);
376 for (unsigned j
= 0; j
+ 1 < block
.linear_succs
.size(); j
++)
377 check_block(block
.linear_succs
[j
] < block
.linear_succs
[j
+ 1], "linear successors must be sorted", &block
);
378 for (unsigned j
= 0; j
+ 1 < block
.logical_succs
.size(); j
++)
379 check_block(block
.logical_succs
[j
] < block
.logical_succs
[j
+ 1], "logical successors must be sorted", &block
);
381 /* critical edges are not allowed */
382 if (block
.linear_preds
.size() > 1) {
383 for (unsigned pred
: block
.linear_preds
)
384 check_block(program
->blocks
[pred
].linear_succs
.size() == 1, "linear critical edges are not allowed", &program
->blocks
[pred
]);
385 for (unsigned pred
: block
.logical_preds
)
386 check_block(program
->blocks
[pred
].logical_succs
.size() == 1, "logical critical edges are not allowed", &program
->blocks
[pred
]);
397 Location() : block(NULL
), instr(NULL
) {}
400 Instruction
*instr
; //NULL if it's the block's live-in
409 bool ra_fail(FILE *output
, Location loc
, Location loc2
, const char *fmt
, ...) {
413 vsprintf(msg
, fmt
, args
);
416 fprintf(stderr
, "RA error found at instruction in BB%d:\n", loc
.block
->index
);
418 aco_print_instr(loc
.instr
, stderr
);
419 fprintf(stderr
, "\n%s", msg
);
421 fprintf(stderr
, "%s", msg
);
424 fprintf(stderr
, " in BB%d:\n", loc2
.block
->index
);
425 aco_print_instr(loc2
.instr
, stderr
);
427 fprintf(stderr
, "\n\n");
432 bool instr_can_access_subdword(Program
* program
, aco_ptr
<Instruction
>& instr
)
434 if (program
->chip_class
< GFX8
)
436 return instr
->isSDWA() || instr
->format
== Format::PSEUDO
;
439 } /* end namespace */
441 bool validate_ra(Program
*program
, const struct radv_nir_compiler_options
*options
, FILE *output
) {
442 if (!(debug_flags
& DEBUG_VALIDATE_RA
))
446 aco::live live_vars
= aco::live_var_analysis(program
, options
);
447 std::vector
<std::vector
<Temp
>> phi_sgpr_ops(program
->blocks
.size());
449 std::map
<unsigned, Assignment
> assignments
;
450 for (Block
& block
: program
->blocks
) {
453 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
454 if (instr
->opcode
== aco_opcode::p_phi
) {
455 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
456 if (instr
->operands
[i
].isTemp() &&
457 instr
->operands
[i
].getTemp().type() == RegType::sgpr
&&
458 instr
->operands
[i
].isFirstKill())
459 phi_sgpr_ops
[block
.logical_preds
[i
]].emplace_back(instr
->operands
[i
].getTemp());
463 loc
.instr
= instr
.get();
464 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
465 Operand
& op
= instr
->operands
[i
];
469 err
|= ra_fail(output
, loc
, Location(), "Operand %d is not assigned a register", i
);
470 if (assignments
.count(op
.tempId()) && assignments
[op
.tempId()].reg
!= op
.physReg())
471 err
|= ra_fail(output
, loc
, assignments
.at(op
.tempId()).firstloc
, "Operand %d has an inconsistent register assignment with instruction", i
);
472 if ((op
.getTemp().type() == RegType::vgpr
&& op
.physReg().reg_b
+ op
.bytes() > (256 + program
->config
->num_vgprs
) * 4) ||
473 (op
.getTemp().type() == RegType::sgpr
&& op
.physReg() + op
.size() > program
->config
->num_sgprs
&& op
.physReg() < program
->sgpr_limit
))
474 err
|= ra_fail(output
, loc
, assignments
.at(op
.tempId()).firstloc
, "Operand %d has an out-of-bounds register assignment", i
);
475 if (op
.physReg() == vcc
&& !program
->needs_vcc
)
476 err
|= ra_fail(output
, loc
, Location(), "Operand %d fixed to vcc but needs_vcc=false", i
);
477 if (!instr_can_access_subdword(program
, instr
) && op
.regClass().is_subdword() && op
.physReg().byte())
478 err
|= ra_fail(output
, loc
, assignments
.at(op
.tempId()).firstloc
, "Operand %d must be aligned to a full register", i
);
479 if (!assignments
[op
.tempId()].firstloc
.block
)
480 assignments
[op
.tempId()].firstloc
= loc
;
481 if (!assignments
[op
.tempId()].defloc
.block
)
482 assignments
[op
.tempId()].reg
= op
.physReg();
485 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
486 Definition
& def
= instr
->definitions
[i
];
490 err
|= ra_fail(output
, loc
, Location(), "Definition %d is not assigned a register", i
);
491 if (assignments
[def
.tempId()].defloc
.block
)
492 err
|= ra_fail(output
, loc
, assignments
.at(def
.tempId()).defloc
, "Temporary %%%d also defined by instruction", def
.tempId());
493 if ((def
.getTemp().type() == RegType::vgpr
&& def
.physReg().reg_b
+ def
.bytes() > (256 + program
->config
->num_vgprs
) * 4) ||
494 (def
.getTemp().type() == RegType::sgpr
&& def
.physReg() + def
.size() > program
->config
->num_sgprs
&& def
.physReg() < program
->sgpr_limit
))
495 err
|= ra_fail(output
, loc
, assignments
.at(def
.tempId()).firstloc
, "Definition %d has an out-of-bounds register assignment", i
);
496 if (def
.physReg() == vcc
&& !program
->needs_vcc
)
497 err
|= ra_fail(output
, loc
, Location(), "Definition %d fixed to vcc but needs_vcc=false", i
);
498 if (!instr_can_access_subdword(program
, instr
) && def
.regClass().is_subdword() && def
.physReg().byte())
499 err
|= ra_fail(output
, loc
, assignments
.at(def
.tempId()).firstloc
, "Definition %d must be aligned to a full register", i
);
500 if (!assignments
[def
.tempId()].firstloc
.block
)
501 assignments
[def
.tempId()].firstloc
= loc
;
502 assignments
[def
.tempId()].defloc
= loc
;
503 assignments
[def
.tempId()].reg
= def
.physReg();
508 for (Block
& block
: program
->blocks
) {
512 std::array
<unsigned, 2048> regs
; /* register file in bytes */
516 live
.insert(live_vars
.live_out
[block
.index
].begin(), live_vars
.live_out
[block
.index
].end());
517 /* remove killed p_phi sgpr operands */
518 for (Temp tmp
: phi_sgpr_ops
[block
.index
])
522 for (Temp tmp
: live
) {
523 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
524 for (unsigned i
= 0; i
< tmp
.bytes(); i
++) {
525 if (regs
[reg
.reg_b
+ i
]) {
526 err
|= ra_fail(output
, loc
, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i
, tmp
.id(), regs
[reg
.reg_b
+ i
]);
528 regs
[reg
.reg_b
+ i
] = tmp
.id();
533 for (auto it
= block
.instructions
.rbegin(); it
!= block
.instructions
.rend(); ++it
) {
534 aco_ptr
<Instruction
>& instr
= *it
;
536 /* check killed p_phi sgpr operands */
537 if (instr
->opcode
== aco_opcode::p_logical_end
) {
538 for (Temp tmp
: phi_sgpr_ops
[block
.index
]) {
539 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
540 for (unsigned i
= 0; i
< tmp
.bytes(); i
++) {
541 if (regs
[reg
.reg_b
+ i
])
542 err
|= ra_fail(output
, loc
, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i
, tmp
.id(), regs
[reg
.reg_b
+ i
]);
548 for (const Definition
& def
: instr
->definitions
) {
551 live
.erase(def
.getTemp());
554 /* don't count phi operands as live-in, since they are actually
555 * killed when they are copied at the predecessor */
556 if (instr
->opcode
!= aco_opcode::p_phi
&& instr
->opcode
!= aco_opcode::p_linear_phi
) {
557 for (const Operand
& op
: instr
->operands
) {
560 live
.insert(op
.getTemp());
565 for (Temp tmp
: live
) {
566 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
567 for (unsigned i
= 0; i
< tmp
.bytes(); i
++)
568 regs
[reg
.reg_b
+ i
] = tmp
.id();
571 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
572 loc
.instr
= instr
.get();
574 /* remove killed p_phi operands from regs */
575 if (instr
->opcode
== aco_opcode::p_logical_end
) {
576 for (Temp tmp
: phi_sgpr_ops
[block
.index
]) {
577 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
578 for (unsigned i
= 0; i
< tmp
.bytes(); i
++)
579 regs
[reg
.reg_b
+ i
] = 0;
583 if (instr
->opcode
!= aco_opcode::p_phi
&& instr
->opcode
!= aco_opcode::p_linear_phi
) {
584 for (const Operand
& op
: instr
->operands
) {
587 if (op
.isFirstKillBeforeDef()) {
588 for (unsigned j
= 0; j
< op
.getTemp().bytes(); j
++)
589 regs
[op
.physReg().reg_b
+ j
] = 0;
594 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
595 Definition
& def
= instr
->definitions
[i
];
598 Temp tmp
= def
.getTemp();
599 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
600 for (unsigned j
= 0; j
< tmp
.bytes(); j
++) {
601 if (regs
[reg
.reg_b
+ j
])
602 err
|= ra_fail(output
, loc
, assignments
.at(regs
[reg
.reg_b
+ j
]).defloc
, "Assignment of element %d of %%%d already taken by %%%d from instruction", i
, tmp
.id(), regs
[reg
.reg_b
+ j
]);
603 regs
[reg
.reg_b
+ j
] = tmp
.id();
605 if (def
.regClass().is_subdword() && !instr_can_access_subdword(program
, instr
)) {
606 for (unsigned j
= tmp
.bytes(); j
< 4; j
++)
607 if (regs
[reg
.reg_b
+ j
])
608 err
|= ra_fail(output
, loc
, assignments
.at(regs
[reg
.reg_b
+ j
]).defloc
, "Assignment of element %d of %%%d overwrites the full register taken by %%%d from instruction", i
, tmp
.id(), regs
[reg
.reg_b
+ j
]);
612 for (const Definition
& def
: instr
->definitions
) {
616 for (unsigned j
= 0; j
< def
.getTemp().bytes(); j
++)
617 regs
[def
.physReg().reg_b
+ j
] = 0;
621 if (instr
->opcode
!= aco_opcode::p_phi
&& instr
->opcode
!= aco_opcode::p_linear_phi
) {
622 for (const Operand
& op
: instr
->operands
) {
625 if (op
.isLateKill() && op
.isFirstKill()) {
626 for (unsigned j
= 0; j
< op
.getTemp().bytes(); j
++)
627 regs
[op
.physReg().reg_b
+ j
] = 0;