2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 void perfwarn(bool cond
, const char *msg
, Instruction
*instr
)
36 fprintf(stderr
, "ACO performance warning: %s\n", msg
);
38 fprintf(stderr
, "instruction: ");
39 aco_print_instr(instr
, stderr
);
40 fprintf(stderr
, "\n");
43 if (debug_flags
& DEBUG_PERFWARN
)
49 void validate(Program
* program
, FILE * output
)
51 if (!(debug_flags
& DEBUG_VALIDATE
))
55 auto check
= [&output
, &is_valid
](bool check
, const char * msg
, aco::Instruction
* instr
) -> void {
57 fprintf(output
, "%s: ", msg
);
58 aco_print_instr(instr
, output
);
59 fprintf(output
, "\n");
64 for (Block
& block
: program
->blocks
) {
65 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
67 /* check base format */
68 Format base_format
= instr
->format
;
69 base_format
= (Format
)((uint32_t)base_format
& ~(uint32_t)Format::SDWA
);
70 base_format
= (Format
)((uint32_t)base_format
& ~(uint32_t)Format::DPP
);
71 if ((uint32_t)base_format
& (uint32_t)Format::VOP1
)
72 base_format
= Format::VOP1
;
73 else if ((uint32_t)base_format
& (uint32_t)Format::VOP2
)
74 base_format
= Format::VOP2
;
75 else if ((uint32_t)base_format
& (uint32_t)Format::VOPC
)
76 base_format
= Format::VOPC
;
77 else if ((uint32_t)base_format
& (uint32_t)Format::VINTRP
)
78 base_format
= Format::VINTRP
;
79 check(base_format
== instr_info
.format
[(int)instr
->opcode
], "Wrong base format for instruction", instr
.get());
81 /* check VOP3 modifiers */
82 if (((uint32_t)instr
->format
& (uint32_t)Format::VOP3
) && instr
->format
!= Format::VOP3
) {
83 check(base_format
== Format::VOP2
||
84 base_format
== Format::VOP1
||
85 base_format
== Format::VOPC
||
86 base_format
== Format::VINTRP
,
87 "Format cannot have VOP3A/VOP3B applied", instr
.get());
90 /* check for undefs */
91 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
92 if (instr
->operands
[i
].isUndefined()) {
93 bool flat
= instr
->format
== Format::FLAT
|| instr
->format
== Format::SCRATCH
|| instr
->format
== Format::GLOBAL
;
94 bool can_be_undef
= is_phi(instr
) || instr
->format
== Format::EXP
||
95 instr
->format
== Format::PSEUDO_REDUCTION
||
96 (flat
&& i
== 1) || (instr
->format
== Format::MIMG
&& i
== 2) ||
97 ((instr
->format
== Format::MUBUF
|| instr
->format
== Format::MTBUF
) && i
== 0);
98 check(can_be_undef
, "Undefs can only be used in certain operands", instr
.get());
102 /* check num literals */
103 if (instr
->isSALU() || instr
->isVALU()) {
104 unsigned num_literals
= 0;
105 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
107 if (instr
->operands
[i
].isLiteral() && instr
->isVOP3() && program
->chip_class
>= GFX10
) {
109 } else if (instr
->operands
[i
].isLiteral()) {
110 check(instr
->format
== Format::SOP1
||
111 instr
->format
== Format::SOP2
||
112 instr
->format
== Format::SOPC
||
113 instr
->format
== Format::VOP1
||
114 instr
->format
== Format::VOP2
||
115 instr
->format
== Format::VOPC
,
116 "Literal applied on wrong instruction format", instr
.get());
119 check(!instr
->isVALU() || i
== 0 || i
== 2, "Wrong source position for Literal argument", instr
.get());
122 check(num_literals
<= 1, "Only 1 Literal allowed", instr
.get());
124 /* check num sgprs for VALU */
125 if (instr
->isVALU()) {
126 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
||
127 (int) instr
->format
& (int) Format::VOPC
||
128 instr
->opcode
== aco_opcode::v_readfirstlane_b32
||
129 instr
->opcode
== aco_opcode::v_readlane_b32
,
130 "Wrong Definition type for VALU instruction", instr
.get());
131 unsigned num_sgpr
= 0;
132 unsigned sgpr_idx
= instr
->operands
.size();
133 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++)
135 if (instr
->operands
[i
].isTemp() && instr
->operands
[i
].regClass().type() == RegType::sgpr
) {
136 check(i
!= 1 || (int) instr
->format
& (int) Format::VOP3A
, "Wrong source position for SGPR argument", instr
.get());
138 if (sgpr_idx
== instr
->operands
.size() || instr
->operands
[sgpr_idx
].tempId() != instr
->operands
[i
].tempId())
143 if (instr
->operands
[i
].isConstant() && !instr
->operands
[i
].isLiteral())
144 check(i
== 0 || (int) instr
->format
& (int) Format::VOP3A
, "Wrong source position for constant argument", instr
.get());
146 check(num_sgpr
+ num_literals
<= 1, "Only 1 Literal OR 1 SGPR allowed", instr
.get());
149 if (instr
->format
== Format::SOP1
|| instr
->format
== Format::SOP2
) {
150 check(instr
->definitions
[0].getTemp().type() == RegType::sgpr
, "Wrong Definition type for SALU instruction", instr
.get());
151 for (const Operand
& op
: instr
->operands
) {
152 check(op
.isConstant() || op
.regClass().type() <= RegType::sgpr
,
153 "Wrong Operand type for SALU instruction", instr
.get());
158 switch (instr
->format
) {
159 case Format::PSEUDO
: {
160 if (instr
->opcode
== aco_opcode::p_create_vector
) {
162 for (const Operand
& op
: instr
->operands
) {
165 check(size
== instr
->definitions
[0].size(), "Definition size does not match operand sizes", instr
.get());
166 if (instr
->definitions
[0].getTemp().type() == RegType::sgpr
) {
167 for (const Operand
& op
: instr
->operands
) {
168 check(op
.isConstant() || op
.regClass().type() == RegType::sgpr
,
169 "Wrong Operand type for scalar vector", instr
.get());
172 } else if (instr
->opcode
== aco_opcode::p_extract_vector
) {
173 check((instr
->operands
[0].isTemp()) && instr
->operands
[1].isConstant(), "Wrong Operand types", instr
.get());
174 check(instr
->operands
[1].constantValue() < instr
->operands
[0].size(), "Index out of range", instr
.get());
175 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
|| instr
->operands
[0].regClass().type() == RegType::sgpr
,
176 "Cannot extract SGPR value from VGPR vector", instr
.get());
177 } else if (instr
->opcode
== aco_opcode::p_parallelcopy
) {
178 check(instr
->definitions
.size() == instr
->operands
.size(), "Number of Operands does not match number of Definitions", instr
.get());
179 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
180 if (instr
->operands
[i
].isTemp())
181 check((instr
->definitions
[i
].getTemp().type() == instr
->operands
[i
].regClass().type()) ||
182 (instr
->definitions
[i
].getTemp().type() == RegType::vgpr
&& instr
->operands
[i
].regClass().type() == RegType::sgpr
),
183 "Operand and Definition types do not match", instr
.get());
185 } else if (instr
->opcode
== aco_opcode::p_phi
) {
186 check(instr
->operands
.size() == block
.logical_preds
.size(), "Number of Operands does not match number of predecessors", instr
.get());
187 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
|| instr
->definitions
[0].getTemp().regClass() == s2
, "Logical Phi Definition must be vgpr or divergent boolean", instr
.get());
188 } else if (instr
->opcode
== aco_opcode::p_linear_phi
) {
189 for (const Operand
& op
: instr
->operands
)
190 check(!op
.isTemp() || op
.getTemp().is_linear(), "Wrong Operand type", instr
.get());
191 check(instr
->operands
.size() == block
.linear_preds
.size(), "Number of Operands does not match number of predecessors", instr
.get());
196 if (instr
->operands
.size() >= 1)
197 check(instr
->operands
[0].isTemp() && instr
->operands
[0].regClass().type() == RegType::sgpr
, "SMEM operands must be sgpr", instr
.get());
198 if (instr
->operands
.size() >= 2)
199 check(instr
->operands
[1].isConstant() || (instr
->operands
[1].isTemp() && instr
->operands
[1].regClass().type() == RegType::sgpr
),
200 "SMEM offset must be constant or sgpr", instr
.get());
201 if (!instr
->definitions
.empty())
202 check(instr
->definitions
[0].getTemp().type() == RegType::sgpr
, "SMEM result must be sgpr", instr
.get());
208 check(instr
->operands
.size() > 1, "VMEM instructions must have at least one operand", instr
.get());
209 check(instr
->operands
[0].hasRegClass() && instr
->operands
[0].regClass().type() == RegType::vgpr
,
210 "VADDR must be in vgpr for VMEM instructions", instr
.get());
211 check(instr
->operands
[1].isTemp() && instr
->operands
[1].regClass().type() == RegType::sgpr
, "VMEM resource constant must be sgpr", instr
.get());
212 check(instr
->operands
.size() < 4 || (instr
->operands
[3].isTemp() && instr
->operands
[3].regClass().type() == RegType::vgpr
), "VMEM write data must be vgpr", instr
.get());
216 for (const Operand
& op
: instr
->operands
) {
217 check((op
.isTemp() && op
.regClass().type() == RegType::vgpr
) || op
.physReg() == m0
,
218 "Only VGPRs are valid DS instruction operands", instr
.get());
220 if (!instr
->definitions
.empty())
221 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
, "DS instruction must return VGPR", instr
.get());
225 for (unsigned i
= 0; i
< 4; i
++)
226 check(instr
->operands
[i
].hasRegClass() && instr
->operands
[i
].regClass().type() == RegType::vgpr
,
227 "Only VGPRs are valid Export arguments", instr
.get());
231 check(instr
->operands
[1].isUndefined(), "Flat instructions don't support SADDR", instr
.get());
234 case Format::SCRATCH
: {
235 check(instr
->operands
[0].isTemp() && instr
->operands
[0].regClass().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH address must be vgpr", instr
.get());
236 check(instr
->operands
[1].hasRegClass() && instr
->operands
[1].regClass().type() == RegType::sgpr
,
237 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr
.get());
238 if (!instr
->definitions
.empty())
239 check(instr
->definitions
[0].getTemp().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH result must be vgpr", instr
.get());
241 check(instr
->operands
[2].regClass().type() == RegType::vgpr
, "FLAT/GLOBAL/SCRATCH data must be vgpr", instr
.get());
256 Location() : block(NULL
), instr(NULL
) {}
259 Instruction
*instr
; //NULL if it's the block's live-in
268 bool ra_fail(FILE *output
, Location loc
, Location loc2
, const char *fmt
, ...) {
272 vsprintf(msg
, fmt
, args
);
275 fprintf(stderr
, "RA error found at instruction in BB%d:\n", loc
.block
->index
);
277 aco_print_instr(loc
.instr
, stderr
);
278 fprintf(stderr
, "\n%s", msg
);
280 fprintf(stderr
, "%s", msg
);
283 fprintf(stderr
, " in BB%d:\n", loc2
.block
->index
);
284 aco_print_instr(loc2
.instr
, stderr
);
286 fprintf(stderr
, "\n\n");
291 } /* end namespace */
293 bool validate_ra(Program
*program
, const struct radv_nir_compiler_options
*options
, FILE *output
) {
294 if (!(debug_flags
& DEBUG_VALIDATE_RA
))
298 aco::live live_vars
= aco::live_var_analysis(program
, options
);
299 std::vector
<std::vector
<Temp
>> phi_sgpr_ops(program
->blocks
.size());
301 std::map
<unsigned, Assignment
> assignments
;
302 for (Block
& block
: program
->blocks
) {
305 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
306 if (instr
->opcode
== aco_opcode::p_phi
) {
307 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
308 if (instr
->operands
[i
].isTemp() &&
309 instr
->operands
[i
].getTemp().type() == RegType::sgpr
&&
310 instr
->operands
[i
].isFirstKill())
311 phi_sgpr_ops
[block
.logical_preds
[i
]].emplace_back(instr
->operands
[i
].getTemp());
315 loc
.instr
= instr
.get();
316 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
317 Operand
& op
= instr
->operands
[i
];
321 err
|= ra_fail(output
, loc
, Location(), "Operand %d is not assigned a register", i
);
322 if (assignments
.count(op
.tempId()) && assignments
[op
.tempId()].reg
!= op
.physReg())
323 err
|= ra_fail(output
, loc
, assignments
.at(op
.tempId()).firstloc
, "Operand %d has an inconsistent register assignment with instruction", i
);
324 if ((op
.getTemp().type() == RegType::vgpr
&& op
.physReg() + op
.size() > 256 + program
->config
->num_vgprs
) ||
325 (op
.getTemp().type() == RegType::sgpr
&& op
.physReg() + op
.size() > program
->config
->num_sgprs
&& op
.physReg() < program
->sgpr_limit
))
326 err
|= ra_fail(output
, loc
, assignments
.at(op
.tempId()).firstloc
, "Operand %d has an out-of-bounds register assignment", i
);
327 if (!assignments
[op
.tempId()].firstloc
.block
)
328 assignments
[op
.tempId()].firstloc
= loc
;
329 if (!assignments
[op
.tempId()].defloc
.block
)
330 assignments
[op
.tempId()].reg
= op
.physReg();
333 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
334 Definition
& def
= instr
->definitions
[i
];
338 err
|= ra_fail(output
, loc
, Location(), "Definition %d is not assigned a register", i
);
339 if (assignments
[def
.tempId()].defloc
.block
)
340 err
|= ra_fail(output
, loc
, assignments
.at(def
.tempId()).defloc
, "Temporary %%%d also defined by instruction", def
.tempId());
341 if ((def
.getTemp().type() == RegType::vgpr
&& def
.physReg() + def
.size() > 256 + program
->config
->num_vgprs
) ||
342 (def
.getTemp().type() == RegType::sgpr
&& def
.physReg() + def
.size() > program
->config
->num_sgprs
&& def
.physReg() < program
->sgpr_limit
))
343 err
|= ra_fail(output
, loc
, assignments
.at(def
.tempId()).firstloc
, "Definition %d has an out-of-bounds register assignment", i
);
344 if (!assignments
[def
.tempId()].firstloc
.block
)
345 assignments
[def
.tempId()].firstloc
= loc
;
346 assignments
[def
.tempId()].defloc
= loc
;
347 assignments
[def
.tempId()].reg
= def
.physReg();
352 for (Block
& block
: program
->blocks
) {
356 std::array
<unsigned, 512> regs
;
360 live
.insert(live_vars
.live_out
[block
.index
].begin(), live_vars
.live_out
[block
.index
].end());
361 /* remove killed p_phi sgpr operands */
362 for (Temp tmp
: phi_sgpr_ops
[block
.index
])
366 for (Temp tmp
: live
) {
367 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
368 for (unsigned i
= 0; i
< tmp
.size(); i
++) {
370 err
|= ra_fail(output
, loc
, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i
, tmp
.id(), regs
[reg
+ i
]);
372 regs
[reg
+ i
] = tmp
.id();
377 for (auto it
= block
.instructions
.rbegin(); it
!= block
.instructions
.rend(); ++it
) {
378 aco_ptr
<Instruction
>& instr
= *it
;
380 /* check killed p_phi sgpr operands */
381 if (instr
->opcode
== aco_opcode::p_logical_end
) {
382 for (Temp tmp
: phi_sgpr_ops
[block
.index
]) {
383 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
384 for (unsigned i
= 0; i
< tmp
.size(); i
++) {
386 err
|= ra_fail(output
, loc
, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i
, tmp
.id(), regs
[reg
+ i
]);
392 for (const Definition
& def
: instr
->definitions
) {
395 live
.erase(def
.getTemp());
398 /* don't count phi operands as live-in, since they are actually
399 * killed when they are copied at the predecessor */
400 if (instr
->opcode
!= aco_opcode::p_phi
&& instr
->opcode
!= aco_opcode::p_linear_phi
) {
401 for (const Operand
& op
: instr
->operands
) {
404 live
.insert(op
.getTemp());
409 for (Temp tmp
: live
) {
410 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
411 for (unsigned i
= 0; i
< tmp
.size(); i
++)
412 regs
[reg
+ i
] = tmp
.id();
415 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
416 loc
.instr
= instr
.get();
418 /* remove killed p_phi operands from regs */
419 if (instr
->opcode
== aco_opcode::p_logical_end
) {
420 for (Temp tmp
: phi_sgpr_ops
[block
.index
]) {
421 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
426 if (instr
->opcode
!= aco_opcode::p_phi
&& instr
->opcode
!= aco_opcode::p_linear_phi
) {
427 for (const Operand
& op
: instr
->operands
) {
430 if (op
.isFirstKill()) {
431 for (unsigned j
= 0; j
< op
.getTemp().size(); j
++)
432 regs
[op
.physReg() + j
] = 0;
437 for (unsigned i
= 0; i
< instr
->definitions
.size(); i
++) {
438 Definition
& def
= instr
->definitions
[i
];
441 Temp tmp
= def
.getTemp();
442 PhysReg reg
= assignments
.at(tmp
.id()).reg
;
443 for (unsigned j
= 0; j
< tmp
.size(); j
++) {
445 err
|= ra_fail(output
, loc
, assignments
.at(regs
[reg
+ i
]).defloc
, "Assignment of element %d of %%%d already taken by %%%d from instruction", i
, tmp
.id(), regs
[reg
+ j
]);
446 regs
[reg
+ j
] = tmp
.id();
450 for (const Definition
& def
: instr
->definitions
) {
454 for (unsigned j
= 0; j
< def
.getTemp().size(); j
++)
455 regs
[def
.physReg() + j
] = 0;