2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 static void aco_log(Program
*program
, enum radv_compiler_debug_level level
,
33 const char *prefix
, const char *file
, unsigned line
,
34 const char *fmt
, va_list args
)
38 msg
= ralloc_strdup(NULL
, prefix
);
40 ralloc_asprintf_append(&msg
, " In file %s:%u\n", file
, line
);
41 ralloc_asprintf_append(&msg
, " ");
42 ralloc_vasprintf_append(&msg
, fmt
, args
);
44 if (program
->debug
.func
)
45 program
->debug
.func(program
->debug
.private_data
, level
, msg
);
47 fprintf(stderr
, "%s\n", msg
);
52 void _aco_perfwarn(Program
*program
, const char *file
, unsigned line
,
58 aco_log(program
, RADV_COMPILER_DEBUG_LEVEL_PERFWARN
,
59 "ACO PERFWARN:\n", file
, line
, fmt
, args
);
63 void _aco_err(Program
*program
, const char *file
, unsigned line
,
69 aco_log(program
, RADV_COMPILER_DEBUG_LEVEL_ERROR
,
70 "ACO ERROR:\n", file
, line
, fmt
, args
);
74 bool validate_ir(Program
* program
)
77 auto check
= [&program
, &is_valid
](bool check
, const char * msg
, aco::Instruction
* instr
) -> void {
81 FILE *memf
= open_memstream(&out
, &outsize
);
83 fprintf(memf
, "%s: ", msg
);
84 aco_print_instr(instr
, memf
);
87 aco_err(program
, out
);
94 auto check_block
= [&program
, &is_valid
](bool check
, const char * msg
, aco::Block
* block
) -> void {
96 aco_err(program
, "%s: BB%u", msg
, block
->index
);
101 for (Block
& block
: program
->blocks
) {
102 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
104 /* check base format */
105 Format base_format
= instr
->format
;
106 base_format
= (Format
)((uint32_t)base_format
& ~(uint32_t)Format::SDWA
);
107 base_format
= (Format
)((uint32_t)base_format
& ~(uint32_t)Format::DPP
);
108 if ((uint32_t)base_format
& (uint32_t)Format::VOP1
)
109 base_format
= Format::VOP1
;
110 else if ((uint32_t)base_format
& (uint32_t)Format::VOP2
)
111 base_format
= Format::VOP2
;
112 else if ((uint32_t)base_format
& (uint32_t)Format::VOPC
)
113 base_format
= Format::VOPC
;
114 else if ((uint32_t)base_format
& (uint32_t)Format::VINTRP
) {
115 if (instr
->opcode
== aco_opcode::v_interp_p1ll_f16
||
116 instr
->opcode
== aco_opcode::v_interp_p1lv_f16
||
117 instr
->opcode
== aco_opcode::v_interp_p2_legacy_f16
||
118 instr
->opcode
== aco_opcode::v_interp_p2_f16
) {
119 /* v_interp_*_fp16 are considered VINTRP by the compiler but
120 * they are emitted as VOP3.
122 base_format
= Format::VOP3
;
124 base_format
= Format::VINTRP
;
127 check(base_format
== instr_info
.format
[(int)instr
->opcode
], "Wrong base format for instruction", instr
.get());
129 /* check VOP3 modifiers */
130 if (((uint32_t)instr
->format
& (uint32_t)Format::VOP3
) && instr
->format
!= Format::VOP3
) {
131 check(base_format
== Format::VOP2
||
132 base_format
== Format::VOP1
||
133 base_format
== Format::VOPC
||
134 base_format
== Format::VINTRP
,
135 "Format cannot have VOP3A/VOP3B applied", instr
.get());
139 if (instr
->isSDWA()) {
140 check(base_format
== Format::VOP2
||
141 base_format
== Format::VOP1
||
142 base_format
== Format::VOPC
,
143 "Format cannot have SDWA applied", instr
.get());
145 check(program
->chip_class
>= GFX8
, "SDWA is GFX8+ only", instr
.get());
147 SDWA_instruction
*sdwa
= static_cast<SDWA_instruction
*>(instr
.get());
148 check(sdwa
->omod
== 0 || program
->chip_class
>= GFX9
, "SDWA omod only supported on GFX9+", instr
.get());
149 if (base_format
== Format::VOPC
) {
150 check(sdwa
->clamp
== false || program
->chip_class
== GFX8
, "SDWA VOPC clamp only supported on GFX8", instr
.get());
151 check((instr
->definitions
[0].isFixed() && instr
->definitions
[0].physReg() == vcc
) ||
152 program
->chip_class
>= GFX9
,
153 "SDWA+VOPC definition must be fixed to vcc on GFX8", instr
.get());
156 if (instr
->operands
.size() >= 3) {
157 check(instr
->operands
[2].isFixed() && instr
->operands
[2].physReg() == vcc
,
158 "3rd operand must be fixed to vcc with SDWA", instr
.get());
160 if (instr
->definitions
.size() >= 2) {
161 check(instr
->definitions
[1].isFixed() && instr
->definitions
[1].physReg() == vcc
,
162 "2nd definition must be fixed to vcc with SDWA", instr
.get());
165 check(instr
->opcode
!= aco_opcode::v_madmk_f32
&&
166 instr
->opcode
!= aco_opcode::v_madak_f32
&&
167 instr
->opcode
!= aco_opcode::v_madmk_f16
&&
168 instr
->opcode
!= aco_opcode::v_madak_f16
&&
169 instr
->opcode
!= aco_opcode::v_readfirstlane_b32
&&
170 instr
->opcode
!= aco_opcode::v_clrexcp
&&
171 instr
->opcode
!= aco_opcode::v_swap_b32
,
172 "SDWA can't be used with this opcode", instr
.get());
173 if (program
->chip_class
!= GFX8
) {
174 check(instr
->opcode
!= aco_opcode::v_mac_f32
&&
175 instr
->opcode
!= aco_opcode::v_mac_f16
&&
176 instr
->opcode
!= aco_opcode::v_fmac_f32
&&
177 instr
->opcode
!= aco_opcode::v_fmac_f16
,
178 "SDWA can't be used with this opcode", instr
.get());
181 for (unsigned i
= 0; i
< MIN2(instr
->operands
.size(), 2); i
++) {
182 if (instr
->operands
[i
].hasRegClass() && instr
->operands
[i
].regClass().is_subdword())
183 check((sdwa
->sel
[i
] & sdwa_asuint
) == (sdwa_isra
| instr
->operands
[i
].bytes()), "Unexpected SDWA sel for sub-dword operand", instr
.get());
185 if (instr
->definitions
[0].regClass().is_subdword())
186 check((sdwa
->dst_sel
& sdwa_asuint
) == (sdwa_isra
| instr
->definitions
[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr
.get());
190 if (instr
->isVOP3()) {
191 VOP3A_instruction
*vop3
= static_cast<VOP3A_instruction
*>(instr
.get());
192 check(vop3
->opsel
== 0 || program
->chip_class
>= GFX9
, "Opsel is only supported on GFX9+", instr
.get());
194 for (unsigned i
= 0; i
< 3; i
++) {
195 if (i
>= instr
->operands
.size() ||
196 (instr
->operands
[i
].hasRegClass() && instr
->operands
[i
].regClass().is_subdword() && !instr
->operands
[i
].isFixed()))
197 check((vop3
->opsel
& (1 << i
)) == 0, "Unexpected opsel for operand", instr
.get());
199 if (instr
->definitions
[0].regClass().is_subdword() && !instr
->definitions
[0].isFixed())
200 check((vop3
->opsel
& (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr
.get());
203 /* check for undefs */
204 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
205 if (instr
->operands
[i
].isUndefined()) {
206 bool flat
= instr
->format
== Format::FLAT
|| instr
->format
== Format::SCRATCH
|| instr
->format
== Format::GLOBAL
;
207 bool can_be_undef
= is_phi(instr
) || instr
->format
== Format::EXP
||
208 instr
->format
== Format::PSEUDO_REDUCTION
||
209 instr
->opcode
== aco_opcode::p_create_vector
||
210 (flat
&& i
== 1) || (instr
->format
== Format::MIMG
&& i
== 1) ||
211 ((instr
->format
== Format::MUBUF
|| instr
->format
== Format::MTBUF
) && i
== 1);
212 check(can_be_undef
, "Undefs can only be used in certain operands", instr
.get());
214 check(instr
->operands
[i
].isFixed() || instr
->operands
[i
].isTemp() || instr
->operands
[i
].isConstant(), "Uninitialized Operand", instr
.get());
218 /* check subdword definitions */
219 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
220 if (instr
->definitions
[i
].regClass().is_subdword())
221 check(instr
->format
== Format::PSEUDO
|| instr
->definitions
[i
].bytes() <= 4, "Only Pseudo instructions can write subdword registers larger than 4 bytes", instr
.get());
224 if (instr
->isSALU() || instr
->isVALU()) {
227 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
229 Operand op
= instr
->operands
[i
];
233 check(instr
->format
== Format::SOP1
||
234 instr
->format
== Format::SOP2
||
235 instr
->format
== Format::SOPC
||
236 instr
->format
== Format::VOP1
||
237 instr
->format
== Format::VOP2
||
238 instr
->format
== Format::VOPC
||
239 (instr
->isVOP3() && program
->chip_class
>= GFX10
),
240 "Literal applied on wrong instruction format", instr
.get());
242 check(literal
.isUndefined() || (literal
.size() == op
.size() && literal
.constantValue() == op
.constantValue()), "Only 1 Literal allowed", instr
.get());
244 check(!instr
->isVALU() || instr
->isVOP3() || i
== 0 || i
== 2, "Wrong source position for Literal argument", instr
.get());
247 /* check num sgprs for VALU */
248 if (instr
->isVALU()) {
249 bool is_shift64
= instr
->opcode
== aco_opcode::v_lshlrev_b64
||
250 instr
->opcode
== aco_opcode::v_lshrrev_b64
||
251 instr
->opcode
== aco_opcode::v_ashrrev_i64
;
252 unsigned const_bus_limit
= 1;
253 if (program
->chip_class
>= GFX10
&& !is_shift64
)
256 uint32_t scalar_mask
= instr
->isVOP3() ? 0x7 : 0x5;
258 scalar_mask
= program
->chip_class
>= GFX9
? 0x7 : 0x4;
260 if ((int) instr
->format
& (int) Format::VOPC
||
261 instr
->opcode
== aco_opcode::v_readfirstlane_b32
||
262 instr
->opcode
== aco_opcode::v_readlane_b32
||
263 instr
->opcode
== aco_opcode::v_readlane_b32_e64
) {
264 check(instr
->definitions
[0].getTemp().type() == RegType::sgpr
,
265 "Wrong Definition type for VALU instruction", instr
.get());
267 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
,
268 "Wrong Definition type for VALU instruction", instr
.get());
271 unsigned num_sgprs
= 0;
272 unsigned sgpr
[] = {0, 0};
273 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
275 Operand op
= instr
->operands
[i
];
276 if (instr
->opcode
== aco_opcode::v_readfirstlane_b32
||
277 instr
->opcode
== aco_opcode::v_readlane_b32
||
278 instr
->opcode
== aco_opcode::v_readlane_b32_e64
) {
280 (op
.isTemp() && op
.regClass().type() == RegType::sgpr
) ||
282 "Must be a SGPR or a constant", instr
.get());
284 (op
.isTemp() && op
.regClass().type() == RegType::vgpr
&& op
.bytes() <= 4),
285 "Wrong Operand type for VALU instruction", instr
.get());
289 if (instr
->opcode
== aco_opcode::v_writelane_b32
||
290 instr
->opcode
== aco_opcode::v_writelane_b32_e64
) {
292 (op
.isTemp() && op
.regClass().type() == RegType::vgpr
&& op
.bytes() <= 4),
293 "Wrong Operand type for VALU instruction", instr
.get());
295 (op
.isTemp() && op
.regClass().type() == RegType::sgpr
) ||
297 "Must be a SGPR or a constant", instr
.get());
300 if (op
.isTemp() && instr
->operands
[i
].regClass().type() == RegType::sgpr
) {
301 check(scalar_mask
& (1 << i
), "Wrong source position for SGPR argument", instr
.get());
303 if (op
.tempId() != sgpr
[0] && op
.tempId() != sgpr
[1]) {
305 sgpr
[num_sgprs
++] = op
.tempId();
309 if (op
.isConstant() && !op
.isLiteral())
310 check(scalar_mask
& (1 << i
), "Wrong source position for constant argument", instr
.get());
312 check(num_sgprs
+ (literal
.isUndefined() ? 0 : 1) <= const_bus_limit
, "Too many SGPRs/literals", instr
.get());
315 if (instr
->format
== Format::SOP1
|| instr
->format
== Format::SOP2
) {
316 check(instr
->definitions
[0].getTemp().type() == RegType::sgpr
, "Wrong Definition type for SALU instruction", instr
.get());
317 for (const Operand
& op
: instr
->operands
) {
318 check(op
.isConstant() || op
.regClass().type() <= RegType::sgpr
,
319 "Wrong Operand type for SALU instruction", instr
.get());
324 switch (instr
->format
) {
325 case Format::PSEUDO
: {
326 bool is_subdword
= false;
327 bool has_const_sgpr
= false;
328 bool has_literal
= false;
329 for (Definition def
: instr
->definitions
)
330 is_subdword
|= def
.regClass().is_subdword();
331 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
332 if (instr
->opcode
== aco_opcode::p_extract_vector
&& i
== 1)
334 Operand op
= instr
->operands
[i
];
335 is_subdword
|= op
.hasRegClass() && op
.regClass().is_subdword();
336 has_const_sgpr
|= op
.isConstant() || (op
.hasRegClass() && op
.regClass().type() == RegType::sgpr
);
337 has_literal
|= op
.isLiteral();
340 check(!is_subdword
|| !has_const_sgpr
|| program
->chip_class
>= GFX9
,
341 "Sub-dword pseudo instructions can only take constants or SGPRs on GFX9+", instr
.get());
342 check(!is_subdword
|| !has_literal
, "Sub-dword pseudo instructions cannot take literals", instr
.get());
344 if (instr
->opcode
== aco_opcode::p_create_vector
) {
346 for (const Operand
& op
: instr
->operands
) {
349 check(size
== instr
->definitions
[0].bytes(), "Definition size does not match operand sizes", instr
.get());
350 if (instr
->definitions
[0].getTemp().type() == RegType::sgpr
) {
351 for (const Operand
& op
: instr
->operands
) {
352 check(op
.isConstant() || op
.regClass().type() == RegType::sgpr
,
353 "Wrong Operand type for scalar vector", instr
.get());
356 } else if (instr
->opcode
== aco_opcode::p_extract_vector
) {
357 check((instr
->operands
[0].isTemp()) && instr
->operands
[1].isConstant(), "Wrong Operand types", instr
.get());
358 check((instr
->operands
[1].constantValue() + 1) * instr
->definitions
[0].bytes() <= instr
->operands
[0].bytes(), "Index out of range", instr
.get());
359 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
|| instr
->operands
[0].regClass().type() == RegType::sgpr
,
360 "Cannot extract SGPR value from VGPR vector", instr
.get());
361 } else if (instr
->opcode
== aco_opcode::p_parallelcopy
) {
362 check(instr
->definitions
.size() == instr
->operands
.size(), "Number of Operands does not match number of Definitions", instr
.get());
363 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
364 if (instr
->operands
[i
].isTemp())
365 check((instr
->definitions
[i
].getTemp().type() == instr
->operands
[i
].regClass().type()) ||
366 (instr
->definitions
[i
].getTemp().type() == RegType::vgpr
&& instr
->operands
[i
].regClass().type() == RegType::sgpr
),
367 "Operand and Definition types do not match", instr
.get());
369 } else if (instr
->opcode
== aco_opcode::p_phi
) {
370 check(instr
->operands
.size() == block
.logical_preds
.size(), "Number of Operands does not match number of predecessors", instr
.get());
371 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
, "Logical Phi Definition must be vgpr", instr
.get());
372 } else if (instr
->opcode
== aco_opcode::p_linear_phi
) {
373 for (const Operand
& op
: instr
->operands
)
374 check(!op
.isTemp() || op
.getTemp().is_linear(), "Wrong Operand type", instr
.get());
375 check(instr
->operands
.size() == block
.linear_preds
.size(), "Number of Operands does not match number of predecessors", instr
.get());
380 if (instr
->operands
.size() >= 1)
381 check((instr
->operands
[0].isFixed() && !instr
->operands
[0].isConstant()) ||
382 (instr
->operands
[0].isTemp() && instr
->operands
[0].regClass().type() == RegType::sgpr
), "SMEM operands must be sgpr", instr
.get());
383 if (instr
->operands
.size() >= 2)
384 check(instr
->operands
[1].isConstant() || (instr
->operands
[1].isTemp() && instr
->operands
[1].regClass().type() == RegType::sgpr
),
385 "SMEM offset must be constant or sgpr", instr
.get());
386 if (!instr
->definitions
.empty())
387 check(instr
->definitions
[0].getTemp().type() == RegType::sgpr
, "SMEM result must be sgpr", instr
.get());
391 case Format::MUBUF
: {
392 check(instr
->operands
.size() > 1, "VMEM instructions must have at least one operand", instr
.get());
393 check(instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::vgpr
,
394 "VADDR must be in vgpr for VMEM instructions", instr
.get());
395 check(instr
->operands
[0].isTemp() && instr
->operands
[0].regClass().type() == RegType::sgpr
, "VMEM resource constant must be sgpr", instr
.get());
396 check(instr
->operands
.size() < 4 || (instr
->operands
[3].isTemp() && instr
->operands
[3].regClass().type() == RegType::vgpr
), "VMEM write data must be vgpr", instr
.get());
400 check(instr
->operands
.size() == 3, "MIMG instructions must have exactly 3 operands", instr
.get());
401 check(instr
->operands
[0].hasRegClass() && (instr
->operands
[0].regClass() == s4
|| instr
->operands
[0].regClass() == s8
),
402 "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr
.get());
403 if (instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::sgpr
)
404 check(instr
->operands
[1].regClass() == s4
, "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr
.get());
405 else if (instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::vgpr
)
406 check((instr
->definitions
.empty() || instr
->definitions
[0].regClass() == instr
->operands
[1].regClass() ||
407 instr
->opcode
== aco_opcode::image_atomic_cmpswap
|| instr
->opcode
== aco_opcode::image_atomic_fcmpswap
),
408 "MIMG operands[1] (VDATA) must be the same as definitions[0] for atomics", instr
.get());
409 check(instr
->operands
[2].hasRegClass() && instr
->operands
[2].regClass().type() == RegType::vgpr
,
410 "MIMG operands[2] (VADDR) must be VGPR", instr
.get());
411 check(instr
->definitions
.empty() || (instr
->definitions
[0].isTemp() && instr
->definitions
[0].regClass().type() == RegType::vgpr
),
412 "MIMG definitions[0] (VDATA) must be VGPR", instr
.get());
416 for (const Operand
& op
: instr
->operands
) {
417 check((op
.isTemp() && op
.regClass().type() == RegType::vgpr
) || op
.physReg() == m0
,
418 "Only VGPRs are valid DS instruction operands", instr
.get());
420 if (!instr
->definitions
.empty())
421 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
, "DS instruction must return VGPR", instr
.get());
425 for (unsigned i
= 0; i
< 4; i
++)
426 check(instr
->operands
[i
].hasRegClass() && instr
->operands
[i
].regClass().type() == RegType::vgpr
,
427 "Only VGPRs are valid Export arguments", instr
.get());
431 check(instr
->operands
[1].isUndefined(), "Flat instructions don't support SADDR", instr
.get());
434 case Format::SCRATCH
: {
435 check(instr
->operands
[0].isTemp() && instr
->operands
[0].regClass().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH address must be vgpr", instr
.get());
436 check(instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::sgpr
,
437 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr
.get());
438 if (!instr
->definitions
.empty())
439 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH result must be vgpr", instr
.get());
441 check(instr
->operands
[2].regClass().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH data must be vgpr", instr
.get());
451 for (unsigned i
= 0; i
< program
->blocks
.size(); i
++) {
452 Block
& block
= program
->blocks
[i
];
453 check_block(block
.index
== i
, "block.index must match actual index", &block
);
455 /* predecessors/successors should be sorted */
456 for (unsigned j
= 0; j
+ 1 < block
.linear_preds
.size(); j
++)
457 check_block(block
.linear_preds
[j
] < block
.linear_preds
[j
+ 1], "linear predecessors must be sorted", &block
);
458 for (unsigned j
= 0; j
+ 1 < block
.logical_preds
.size(); j
++)
459 check_block(block
.logical_preds
[j
] < block
.logical_preds
[j
+ 1], "logical predecessors must be sorted", &block
);
460 for (unsigned j
= 0; j
+ 1 < block
.linear_succs
.size(); j
++)
461 check_block(block
.linear_succs
[j
] < block
.linear_succs
[j
+ 1], "linear successors must be sorted", &block
);
462 for (unsigned j
= 0; j
+ 1 < block
.logical_succs
.size(); j
++)
463 check_block(block
.logical_succs
[j
] < block
.logical_succs
[j
+ 1], "logical successors must be sorted", &block
);
465 /* critical edges are not allowed */
466 if (block
.linear_preds
.size() > 1) {
467 for (unsigned pred
: block
.linear_preds
)
468 check_block(program
->blocks
[pred
].linear_succs
.size() == 1, "linear critical edges are not allowed", &program
->blocks
[pred
]);
469 for (unsigned pred
: block
.logical_preds
)
470 check_block(program
->blocks
[pred
].logical_succs
.size() == 1, "logical critical edges are not allowed", &program
->blocks
[pred
]);
481 Location() : block(NULL
), instr(NULL
) {}
484 Instruction
*instr
; //NULL if it's the block's live-in
493 bool ra_fail(Program
*program
, Location loc
, Location loc2
, const char *fmt
, ...) {
497 vsprintf(msg
, fmt
, args
);
502 FILE *memf
= open_memstream(&out
, &outsize
);
504 fprintf(memf
, "RA error found at instruction in BB%d:\n", loc
.block
->index
);
506 aco_print_instr(loc
.instr
, memf
);
507 fprintf(memf
, "\n%s", msg
);
509 fprintf(memf
, "%s", msg
);
512 fprintf(memf
, " in BB%d:\n", loc2
.block
->index
);
513 aco_print_instr(loc2
.instr
, memf
);
515 fprintf(memf
, "\n\n");
518 aco_err(program
, out
);
524 bool validate_subdword_operand(chip_class chip
, const aco_ptr
<Instruction
>& instr
, unsigned index
)
526 Operand op
= instr
->operands
[index
];
527 unsigned byte
= op
.physReg().byte();
529 if (instr
->opcode
== aco_opcode::p_as_uniform
)
531 if (instr
->format
== Format::PSEUDO
&& chip
>= GFX8
)
533 if (instr
->isSDWA() && (static_cast<SDWA_instruction
*>(instr
.get())->sel
[index
] & sdwa_asuint
) == (sdwa_isra
| op
.bytes()))
535 if (byte
== 2 && can_use_opsel(chip
, instr
->opcode
, index
, 1))
538 switch (instr
->opcode
) {
539 case aco_opcode::v_cvt_f32_ubyte1
:
543 case aco_opcode::v_cvt_f32_ubyte2
:
547 case aco_opcode::v_cvt_f32_ubyte3
:
551 case aco_opcode::ds_write_b8_d16_hi
:
552 case aco_opcode::ds_write_b16_d16_hi
:
553 if (byte
== 2 && index
== 1)
556 case aco_opcode::buffer_store_byte_d16_hi
:
557 case aco_opcode::buffer_store_short_d16_hi
:
558 if (byte
== 2 && index
== 3)
561 case aco_opcode::flat_store_byte_d16_hi
:
562 case aco_opcode::flat_store_short_d16_hi
:
563 case aco_opcode::scratch_store_byte_d16_hi
:
564 case aco_opcode::scratch_store_short_d16_hi
:
565 case aco_opcode::global_store_byte_d16_hi
:
566 case aco_opcode::global_store_short_d16_hi
:
567 if (byte
== 2 && index
== 2)
576 bool validate_subdword_definition(chip_class chip
, const aco_ptr
<Instruction
>& instr
)
578 Definition def
= instr
->definitions
[0];
579 unsigned byte
= def
.physReg().byte();
581 if (instr
->format
== Format::PSEUDO
&& chip
>= GFX8
)
583 if (instr
->isSDWA() && static_cast<SDWA_instruction
*>(instr
.get())->dst_sel
== (sdwa_isra
| def
.bytes()))
585 if (byte
== 2 && can_use_opsel(chip
, instr
->opcode
, -1, 1))
588 switch (instr
->opcode
) {
589 case aco_opcode::buffer_load_ubyte_d16_hi
:
590 case aco_opcode::buffer_load_short_d16_hi
:
591 case aco_opcode::flat_load_ubyte_d16_hi
:
592 case aco_opcode::flat_load_short_d16_hi
:
593 case aco_opcode::scratch_load_ubyte_d16_hi
:
594 case aco_opcode::scratch_load_short_d16_hi
:
595 case aco_opcode::global_load_ubyte_d16_hi
:
596 case aco_opcode::global_load_short_d16_hi
:
597 case aco_opcode::ds_read_u8_d16_hi
:
598 case aco_opcode::ds_read_u16_d16_hi
:
607 unsigned get_subdword_bytes_written(Program
*program
, const aco_ptr
<Instruction
>& instr
, unsigned index
)
609 chip_class chip
= program
->chip_class
;
610 Definition def
= instr
->definitions
[index
];
612 if (instr
->format
== Format::PSEUDO
)
613 return chip
>= GFX8
? def
.bytes() : def
.size() * 4u;
614 if (instr
->isSDWA() && static_cast<SDWA_instruction
*>(instr
.get())->dst_sel
== (sdwa_isra
| def
.bytes()))
617 switch (instr
->opcode
) {
618 case aco_opcode::buffer_load_ubyte_d16
:
619 case aco_opcode::buffer_load_short_d16
:
620 case aco_opcode::flat_load_ubyte_d16
:
621 case aco_opcode::flat_load_short_d16
:
622 case aco_opcode::scratch_load_ubyte_d16
:
623 case aco_opcode::scratch_load_short_d16
:
624 case aco_opcode::global_load_ubyte_d16
:
625 case aco_opcode::global_load_short_d16
:
626 case aco_opcode::ds_read_u8_d16
:
627 case aco_opcode::ds_read_u16_d16
:
628 case aco_opcode::buffer_load_ubyte_d16_hi
:
629 case aco_opcode::buffer_load_short_d16_hi
:
630 case aco_opcode::flat_load_ubyte_d16_hi
:
631 case aco_opcode::flat_load_short_d16_hi
:
632 case aco_opcode::scratch_load_ubyte_d16_hi
:
633 case aco_opcode::scratch_load_short_d16_hi
:
634 case aco_opcode::global_load_ubyte_d16_hi
:
635 case aco_opcode::global_load_short_d16_hi
:
636 case aco_opcode::ds_read_u8_d16_hi
:
637 case aco_opcode::ds_read_u16_d16_hi
:
638 return program
->sram_ecc_enabled
? 4 : 2;
639 case aco_opcode::v_mad_f16
:
640 case aco_opcode::v_mad_u16
:
641 case aco_opcode::v_mad_i16
:
642 case aco_opcode::v_fma_f16
:
643 case aco_opcode::v_div_fixup_f16
:
644 case aco_opcode::v_interp_p2_f16
:
651 return MAX2(chip
>= GFX10
? def
.bytes() : 4, instr_info
.definition_size
[(int)instr
->opcode
] / 8u);
654 } /* end namespace */
656 bool validate_ra(Program
*program
, const struct radv_nir_compiler_options
*options
) {
657 if (!(debug_flags
& DEBUG_VALIDATE_RA
))
661 aco::live live_vars
= aco::live_var_analysis(program
, options
);
662 std::vector
<std::vector
<Temp
>> phi_sgpr_ops(program
->blocks
.size());
664 std::map
<unsigned, Assignment
> assignments
;
665 for (Block
& block
: program
->blocks
) {
668 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
669 if (instr
->opcode
== aco_opcode::p_phi
) {
670 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
671 if (instr
->operands
[i
].isTemp() &&
672 instr
->operands
[i
].getTemp().type() == RegType::sgpr
&&
673 instr
->operands
[i
].isFirstKill())
674 phi_sgpr_ops
[block
.logical_preds
[i
]].emplace_back(instr
->operands
[i
].getTemp());
678 loc
.instr
= instr
.get();
679 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
680 Operand
& op
= instr
->operands
[i
];
684 err
|= ra_fail(program
, loc
, Location(), "Operand %d is not assigned a register", i
);
685 if (assignments
.count(op
.tempId()) && assignments
[op
.tempId()].reg
!= op
.physReg())
686 err
|= ra_fail(program
, loc
, assignments
.at(op
.tempId()).firstloc
, "Operand %d has an inconsistent register assignment with instruction", i
);
687 if ((op
.getTemp().type() == RegType::vgpr
&& op
.physReg().reg_b
+ op
.bytes() > (256 + program
->config
->num_vgprs
) * 4) ||
688 (op
.getTemp().type() == RegType::sgpr
&& op
.physReg() + op
.size() > program
->config
->num_sgprs
&& op
.physReg() < program
->sgpr_limit
))
689 err
|= ra_fail(program
, loc
, assignments
.at(op
.tempId()).firstloc
, "Operand %d has an out-of-bounds register assignment", i
);
690 if (op
.physReg() == vcc
&& !program
->needs_vcc
)
691 err
|= ra_fail(program
, loc
, Location(), "Operand %d fixed to vcc but needs_vcc=false", i
);
692 if (op
.regClass().is_subdword() && !validate_subdword_operand(program
->chip_class
, instr
, i
))
693 err
|= ra_fail(program
, loc
, Location(), "Operand %d not aligned correctly", i
);
694 if (!assignments
[op
.tempId()].firstloc
.block
)
695 assignments
[op
.tempId()].firstloc
= loc
;
696 if (!assignments
[op
.tempId()].defloc
.block
)
697 assignments
[op
.tempId()].reg
= op
.physReg();
700 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
701 Definition
& def
= instr
->definitions
[i
];
705 err
|= ra_fail(program
, loc
, Location(), "Definition %d is not assigned a register", i
);
706 if (assignments
[def
.tempId()].defloc
.block
)
707 err
|= ra_fail(program
, loc
, assignments
.at(def
.tempId()).defloc
, "Temporary %%%d also defined by instruction", def
.tempId());
708 if ((def
.getTemp().type() == RegType::vgpr
&& def
.physReg().reg_b
+ def
.bytes() > (256 + program
->config
->num_vgprs
) * 4) ||
709 (def
.getTemp().type() == RegType::sgpr
&& def
.physReg() + def
.size() > program
->config
->num_sgprs
&& def
.physReg() < program
->sgpr_limit
))
710 err
|= ra_fail(program
, loc
, assignments
.at(def
.tempId()).firstloc
, "Definition %d has an out-of-bounds register assignment", i
);
711 if (def
.physReg() == vcc
&& !program
->needs_vcc
)
712 err
|= ra_fail(program
, loc
, Location(), "Definition %d fixed to vcc but needs_vcc=false", i
);
713 if (def
.regClass().is_subdword() && !validate_subdword_definition(program
->chip_class
, instr
))
714 err
|= ra_fail(program
, loc
, Location(), "Definition %d not aligned correctly", i
);
715 if (!assignments
[def
.tempId()].firstloc
.block
)
716 assignments
[def
.tempId()].firstloc
= loc
;
717 assignments
[def
.tempId()].defloc
= loc
;
718 assignments
[def
.tempId()].reg
= def
.physReg();
723 for (Block
& block
: program
->blocks
) {
727 std::array
<unsigned, 2048> regs
; /* register file in bytes */
731 live
.insert(live_vars
.live_out
[block
.index
].begin(), live_vars
.live_out
[block
.index
].end());
732 /* remove killed p_phi sgpr operands */
733 for (Temp tmp
: phi_sgpr_ops
[block
.index
])
737 for (Temp tmp
: live
) {
738 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
739 for (unsigned i
= 0; i
< tmp
.bytes(); i
++) {
740 if (regs
[reg
.reg_b
+ i
]) {
741 err
|= ra_fail(program
, loc
, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i
, tmp
.id(), regs
[reg
.reg_b
+ i
]);
743 regs
[reg
.reg_b
+ i
] = tmp
.id();
748 for (auto it
= block
.instructions
.rbegin(); it
!= block
.instructions
.rend(); ++it
) {
749 aco_ptr
<Instruction
>& instr
= *it
;
751 /* check killed p_phi sgpr operands */
752 if (instr
->opcode
== aco_opcode::p_logical_end
) {
753 for (Temp tmp
: phi_sgpr_ops
[block
.index
]) {
754 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
755 for (unsigned i
= 0; i
< tmp
.bytes(); i
++) {
756 if (regs
[reg
.reg_b
+ i
])
757 err
|= ra_fail(program
, loc
, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i
, tmp
.id(), regs
[reg
.reg_b
+ i
]);
763 for (const Definition
& def
: instr
->definitions
) {
766 live
.erase(def
.getTemp());
769 /* don't count phi operands as live-in, since they are actually
770 * killed when they are copied at the predecessor */
771 if (instr
->opcode
!= aco_opcode::p_phi
&& instr
->opcode
!= aco_opcode::p_linear_phi
) {
772 for (const Operand
& op
: instr
->operands
) {
775 live
.insert(op
.getTemp());
780 for (Temp tmp
: live
) {
781 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
782 for (unsigned i
= 0; i
< tmp
.bytes(); i
++)
783 regs
[reg
.reg_b
+ i
] = tmp
.id();
786 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
787 loc
.instr
= instr
.get();
789 /* remove killed p_phi operands from regs */
790 if (instr
->opcode
== aco_opcode::p_logical_end
) {
791 for (Temp tmp
: phi_sgpr_ops
[block
.index
]) {
792 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
793 for (unsigned i
= 0; i
< tmp
.bytes(); i
++)
794 regs
[reg
.reg_b
+ i
] = 0;
798 if (instr
->opcode
!= aco_opcode::p_phi
&& instr
->opcode
!= aco_opcode::p_linear_phi
) {
799 for (const Operand
& op
: instr
->operands
) {
802 if (op
.isFirstKillBeforeDef()) {
803 for (unsigned j
= 0; j
< op
.getTemp().bytes(); j
++)
804 regs
[op
.physReg().reg_b
+ j
] = 0;
809 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
810 Definition
& def
= instr
->definitions
[i
];
813 Temp tmp
= def
.getTemp();
814 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
815 for (unsigned j
= 0; j
< tmp
.bytes(); j
++) {
816 if (regs
[reg
.reg_b
+ j
])
817 err
|= ra_fail(program
, loc
, assignments
.at(regs
[reg
.reg_b
+ j
]).defloc
, "Assignment of element %d of %%%d already taken by %%%d from instruction", i
, tmp
.id(), regs
[reg
.reg_b
+ j
]);
818 regs
[reg
.reg_b
+ j
] = tmp
.id();
820 if (def
.regClass().is_subdword() && def
.bytes() < 4) {
821 unsigned written
= get_subdword_bytes_written(program
, instr
, i
);
822 /* If written=4, the instruction still might write the upper half. In that case, it's the lower half that isn't preserved */
823 for (unsigned j
= reg
.byte() & ~(written
- 1); j
< written
; j
++) {
824 unsigned written_reg
= reg
.reg() * 4u + j
;
825 if (regs
[written_reg
] && regs
[written_reg
] != def
.tempId())
826 err
|= ra_fail(program
, loc
, assignments
.at(regs
[written_reg
]).defloc
, "Assignment of element %d of %%%d overwrites the full register taken by %%%d from instruction", i
, tmp
.id(), regs
[written_reg
]);
831 for (const Definition
& def
: instr
->definitions
) {
835 for (unsigned j
= 0; j
< def
.getTemp().bytes(); j
++)
836 regs
[def
.physReg().reg_b
+ j
] = 0;
840 if (instr
->opcode
!= aco_opcode::p_phi
&& instr
->opcode
!= aco_opcode::p_linear_phi
) {
841 for (const Operand
& op
: instr
->operands
) {
844 if (op
.isLateKill() && op
.isFirstKill()) {
845 for (unsigned j
= 0; j
< op
.getTemp().bytes(); j
++)
846 regs
[op
.physReg().reg_b
+ j
] = 0;