aco: validate v_interp_*_f16 as VOP3 instructions instead of VINTRP
[mesa.git] / src / amd / compiler / aco_validate.cpp
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "aco_ir.h"
26
27 #include <array>
28 #include <map>
29
30 namespace aco {
31
32 #ifndef NDEBUG
33 void perfwarn(bool cond, const char *msg, Instruction *instr)
34 {
35 if (cond) {
36 fprintf(stderr, "ACO performance warning: %s\n", msg);
37 if (instr) {
38 fprintf(stderr, "instruction: ");
39 aco_print_instr(instr, stderr);
40 fprintf(stderr, "\n");
41 }
42
43 if (debug_flags & DEBUG_PERFWARN)
44 exit(1);
45 }
46 }
47 #endif
48
49 bool instr_can_access_subdword(aco_ptr<Instruction>& instr)
50 {
51 return instr->isSDWA() || instr->format == Format::PSEUDO;
52 }
53
54 void validate(Program* program, FILE * output)
55 {
56 if (!(debug_flags & DEBUG_VALIDATE))
57 return;
58
59 bool is_valid = true;
60 auto check = [&output, &is_valid](bool check, const char * msg, aco::Instruction * instr) -> void {
61 if (!check) {
62 fprintf(output, "%s: ", msg);
63 aco_print_instr(instr, output);
64 fprintf(output, "\n");
65 is_valid = false;
66 }
67 };
68 auto check_block = [&output, &is_valid](bool check, const char * msg, aco::Block * block) -> void {
69 if (!check) {
70 fprintf(output, "%s: BB%u\n", msg, block->index);
71 is_valid = false;
72 }
73 };
74
75 for (Block& block : program->blocks) {
76 for (aco_ptr<Instruction>& instr : block.instructions) {
77
78 /* check base format */
79 Format base_format = instr->format;
80 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
81 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP);
82 if ((uint32_t)base_format & (uint32_t)Format::VOP1)
83 base_format = Format::VOP1;
84 else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
85 base_format = Format::VOP2;
86 else if ((uint32_t)base_format & (uint32_t)Format::VOPC)
87 base_format = Format::VOPC;
88 else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) {
89 if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
90 instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
91 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
92 instr->opcode == aco_opcode::v_interp_p2_f16) {
93 /* v_interp_*_fp16 are considered VINTRP by the compiler but
94 * they are emitted as VOP3.
95 */
96 base_format = Format::VOP3;
97 } else {
98 base_format = Format::VINTRP;
99 }
100 }
101 check(base_format == instr_info.format[(int)instr->opcode], "Wrong base format for instruction", instr.get());
102
103 /* check VOP3 modifiers */
104 if (((uint32_t)instr->format & (uint32_t)Format::VOP3) && instr->format != Format::VOP3) {
105 check(base_format == Format::VOP2 ||
106 base_format == Format::VOP1 ||
107 base_format == Format::VOPC ||
108 base_format == Format::VINTRP,
109 "Format cannot have VOP3A/VOP3B applied", instr.get());
110 }
111
112 /* check SDWA */
113 if (instr->isSDWA()) {
114 check(base_format == Format::VOP2 ||
115 base_format == Format::VOP1 ||
116 base_format == Format::VOPC,
117 "Format cannot have SDWA applied", instr.get());
118
119 check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get());
120
121 SDWA_instruction *sdwa = static_cast<SDWA_instruction*>(instr.get());
122 check(sdwa->omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get());
123 if (base_format == Format::VOPC) {
124 check(sdwa->clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get());
125 check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
126 program->chip_class >= GFX9,
127 "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
128 }
129
130 if (instr->operands.size() >= 3) {
131 check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
132 "3rd operand must be fixed to vcc with SDWA", instr.get());
133 }
134 if (instr->definitions.size() >= 2) {
135 check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
136 "2nd definition must be fixed to vcc with SDWA", instr.get());
137 }
138
139 check(instr->opcode != aco_opcode::v_madmk_f32 &&
140 instr->opcode != aco_opcode::v_madak_f32 &&
141 instr->opcode != aco_opcode::v_madmk_f16 &&
142 instr->opcode != aco_opcode::v_madak_f16 &&
143 instr->opcode != aco_opcode::v_readfirstlane_b32 &&
144 instr->opcode != aco_opcode::v_clrexcp &&
145 instr->opcode != aco_opcode::v_swap_b32,
146 "SDWA can't be used with this opcode", instr.get());
147 if (program->chip_class != GFX8) {
148 check(instr->opcode != aco_opcode::v_mac_f32 &&
149 instr->opcode != aco_opcode::v_mac_f16 &&
150 instr->opcode != aco_opcode::v_fmac_f32 &&
151 instr->opcode != aco_opcode::v_fmac_f16,
152 "SDWA can't be used with this opcode", instr.get());
153 }
154 }
155
156 /* check opsel */
157 if (instr->isVOP3()) {
158 VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get());
159 check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
160 check((vop3->opsel & ~(0x10 | ((1 << instr->operands.size()) - 1))) == 0, "Unused bits in opsel must be zeroed out", instr.get());
161 }
162
163 /* check for undefs */
164 for (unsigned i = 0; i < instr->operands.size(); i++) {
165 if (instr->operands[i].isUndefined()) {
166 bool flat = instr->format == Format::FLAT || instr->format == Format::SCRATCH || instr->format == Format::GLOBAL;
167 bool can_be_undef = is_phi(instr) || instr->format == Format::EXP ||
168 instr->format == Format::PSEUDO_REDUCTION ||
169 instr->opcode == aco_opcode::p_create_vector ||
170 (flat && i == 1) || (instr->format == Format::MIMG && i == 1) ||
171 ((instr->format == Format::MUBUF || instr->format == Format::MTBUF) && i == 1);
172 check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
173 } else {
174 check(instr->operands[i].isFixed() || instr->operands[i].isTemp() || instr->operands[i].isConstant(), "Uninitialized Operand", instr.get());
175 }
176 }
177
178 /* check subdword definitions */
179 for (unsigned i = 0; i < instr->definitions.size(); i++) {
180 if (instr->definitions[i].regClass().is_subdword())
181 check(instr_can_access_subdword(instr) || instr->definitions[i].bytes() <= 4, "Only SDWA and Pseudo instructions can write subdword registers larger than 4 bytes", instr.get());
182 }
183
184 if (instr->isSALU() || instr->isVALU()) {
185 /* check literals */
186 Operand literal(s1);
187 for (unsigned i = 0; i < instr->operands.size(); i++)
188 {
189 Operand op = instr->operands[i];
190 if (!op.isLiteral())
191 continue;
192
193 check(instr->format == Format::SOP1 ||
194 instr->format == Format::SOP2 ||
195 instr->format == Format::SOPC ||
196 instr->format == Format::VOP1 ||
197 instr->format == Format::VOP2 ||
198 instr->format == Format::VOPC ||
199 (instr->isVOP3() && program->chip_class >= GFX10),
200 "Literal applied on wrong instruction format", instr.get());
201
202 check(literal.isUndefined() || (literal.size() == op.size() && literal.constantValue() == op.constantValue()), "Only 1 Literal allowed", instr.get());
203 literal = op;
204 check(!instr->isVALU() || instr->isVOP3() || i == 0 || i == 2, "Wrong source position for Literal argument", instr.get());
205 }
206
207 /* check num sgprs for VALU */
208 if (instr->isVALU()) {
209 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
210 instr->opcode == aco_opcode::v_lshrrev_b64 ||
211 instr->opcode == aco_opcode::v_ashrrev_i64;
212 unsigned const_bus_limit = 1;
213 if (program->chip_class >= GFX10 && !is_shift64)
214 const_bus_limit = 2;
215
216 uint32_t scalar_mask = instr->isVOP3() ? 0x7 : 0x5;
217 if (instr->isSDWA())
218 scalar_mask = program->chip_class >= GFX9 ? 0x7 : 0x4;
219
220 check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
221 (int) instr->format & (int) Format::VOPC ||
222 instr->opcode == aco_opcode::v_readfirstlane_b32 ||
223 instr->opcode == aco_opcode::v_readlane_b32 ||
224 instr->opcode == aco_opcode::v_readlane_b32_e64,
225 "Wrong Definition type for VALU instruction", instr.get());
226 unsigned num_sgprs = 0;
227 unsigned sgpr[] = {0, 0};
228 for (unsigned i = 0; i < instr->operands.size(); i++)
229 {
230 Operand op = instr->operands[i];
231 if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
232 instr->opcode == aco_opcode::v_readlane_b32 ||
233 instr->opcode == aco_opcode::v_readlane_b32_e64 ||
234 instr->opcode == aco_opcode::v_writelane_b32 ||
235 instr->opcode == aco_opcode::v_writelane_b32_e64) {
236 check(!op.isLiteral(), "No literal allowed on VALU instruction", instr.get());
237 check(i == 1 || (op.isTemp() && op.regClass() == v1), "Wrong Operand type for VALU instruction", instr.get());
238 continue;
239 }
240 if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {
241 check(scalar_mask & (1 << i), "Wrong source position for SGPR argument", instr.get());
242
243 if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
244 if (num_sgprs < 2)
245 sgpr[num_sgprs++] = op.tempId();
246 }
247 }
248
249 if (op.isConstant() && !op.isLiteral())
250 check(scalar_mask & (1 << i), "Wrong source position for constant argument", instr.get());
251 }
252 check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit, "Too many SGPRs/literals", instr.get());
253 }
254
255 if (instr->format == Format::SOP1 || instr->format == Format::SOP2) {
256 check(instr->definitions[0].getTemp().type() == RegType::sgpr, "Wrong Definition type for SALU instruction", instr.get());
257 for (const Operand& op : instr->operands) {
258 check(op.isConstant() || op.regClass().type() <= RegType::sgpr,
259 "Wrong Operand type for SALU instruction", instr.get());
260 }
261 }
262 }
263
264 switch (instr->format) {
265 case Format::PSEUDO: {
266 if (instr->opcode == aco_opcode::p_create_vector) {
267 unsigned size = 0;
268 for (const Operand& op : instr->operands) {
269 size += op.bytes();
270 }
271 check(size == instr->definitions[0].bytes(), "Definition size does not match operand sizes", instr.get());
272 if (instr->definitions[0].getTemp().type() == RegType::sgpr) {
273 for (const Operand& op : instr->operands) {
274 check(op.isConstant() || op.regClass().type() == RegType::sgpr,
275 "Wrong Operand type for scalar vector", instr.get());
276 }
277 }
278 } else if (instr->opcode == aco_opcode::p_extract_vector) {
279 check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(), "Wrong Operand types", instr.get());
280 check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <= instr->operands[0].bytes(), "Index out of range", instr.get());
281 check(instr->definitions[0].getTemp().type() == RegType::vgpr || instr->operands[0].regClass().type() == RegType::sgpr,
282 "Cannot extract SGPR value from VGPR vector", instr.get());
283 } else if (instr->opcode == aco_opcode::p_parallelcopy) {
284 check(instr->definitions.size() == instr->operands.size(), "Number of Operands does not match number of Definitions", instr.get());
285 for (unsigned i = 0; i < instr->operands.size(); i++) {
286 if (instr->operands[i].isTemp())
287 check((instr->definitions[i].getTemp().type() == instr->operands[i].regClass().type()) ||
288 (instr->definitions[i].getTemp().type() == RegType::vgpr && instr->operands[i].regClass().type() == RegType::sgpr),
289 "Operand and Definition types do not match", instr.get());
290 }
291 } else if (instr->opcode == aco_opcode::p_phi) {
292 check(instr->operands.size() == block.logical_preds.size(), "Number of Operands does not match number of predecessors", instr.get());
293 check(instr->definitions[0].getTemp().type() == RegType::vgpr || instr->definitions[0].getTemp().regClass() == program->lane_mask, "Logical Phi Definition must be vgpr or divergent boolean", instr.get());
294 } else if (instr->opcode == aco_opcode::p_linear_phi) {
295 for (const Operand& op : instr->operands)
296 check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type", instr.get());
297 check(instr->operands.size() == block.linear_preds.size(), "Number of Operands does not match number of predecessors", instr.get());
298 }
299 break;
300 }
301 case Format::SMEM: {
302 if (instr->operands.size() >= 1)
303 check(instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr, "SMEM operands must be sgpr", instr.get());
304 if (instr->operands.size() >= 2)
305 check(instr->operands[1].isConstant() || (instr->operands[1].isTemp() && instr->operands[1].regClass().type() == RegType::sgpr),
306 "SMEM offset must be constant or sgpr", instr.get());
307 if (!instr->definitions.empty())
308 check(instr->definitions[0].getTemp().type() == RegType::sgpr, "SMEM result must be sgpr", instr.get());
309 break;
310 }
311 case Format::MTBUF:
312 case Format::MUBUF: {
313 check(instr->operands.size() > 1, "VMEM instructions must have at least one operand", instr.get());
314 check(instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::vgpr,
315 "VADDR must be in vgpr for VMEM instructions", instr.get());
316 check(instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr, "VMEM resource constant must be sgpr", instr.get());
317 check(instr->operands.size() < 4 || (instr->operands[3].isTemp() && instr->operands[3].regClass().type() == RegType::vgpr), "VMEM write data must be vgpr", instr.get());
318 break;
319 }
320 case Format::MIMG: {
321 check(instr->operands.size() == 3, "MIMG instructions must have exactly 3 operands", instr.get());
322 check(instr->operands[0].hasRegClass() && (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
323 "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
324 if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::sgpr)
325 check(instr->operands[1].regClass() == s4, "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
326 else if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::vgpr)
327 check((instr->definitions.empty() || instr->definitions[0].regClass() == instr->operands[1].regClass() ||
328 instr->opcode == aco_opcode::image_atomic_cmpswap || instr->opcode == aco_opcode::image_atomic_fcmpswap),
329 "MIMG operands[1] (VDATA) must be the same as definitions[0] for atomics", instr.get());
330 check(instr->operands[2].hasRegClass() && instr->operands[2].regClass().type() == RegType::vgpr,
331 "MIMG operands[2] (VADDR) must be VGPR", instr.get());
332 check(instr->definitions.empty() || (instr->definitions[0].isTemp() && instr->definitions[0].regClass().type() == RegType::vgpr),
333 "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
334 break;
335 }
336 case Format::DS: {
337 for (const Operand& op : instr->operands) {
338 check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,
339 "Only VGPRs are valid DS instruction operands", instr.get());
340 }
341 if (!instr->definitions.empty())
342 check(instr->definitions[0].getTemp().type() == RegType::vgpr, "DS instruction must return VGPR", instr.get());
343 break;
344 }
345 case Format::EXP: {
346 for (unsigned i = 0; i < 4; i++)
347 check(instr->operands[i].hasRegClass() && instr->operands[i].regClass().type() == RegType::vgpr,
348 "Only VGPRs are valid Export arguments", instr.get());
349 break;
350 }
351 case Format::FLAT:
352 check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR", instr.get());
353 /* fallthrough */
354 case Format::GLOBAL:
355 case Format::SCRATCH: {
356 check(instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr, "FLAT/GLOBAL/SCRATCH address must be vgpr", instr.get());
357 check(instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::sgpr,
358 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
359 if (!instr->definitions.empty())
360 check(instr->definitions[0].getTemp().type() == RegType::vgpr, "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
361 else
362 check(instr->operands[2].regClass().type() == RegType::vgpr, "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
363 break;
364 }
365 default:
366 break;
367 }
368 }
369 }
370
371 /* validate CFG */
372 for (unsigned i = 0; i < program->blocks.size(); i++) {
373 Block& block = program->blocks[i];
374 check_block(block.index == i, "block.index must match actual index", &block);
375
376 /* predecessors/successors should be sorted */
377 for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)
378 check_block(block.linear_preds[j] < block.linear_preds[j + 1], "linear predecessors must be sorted", &block);
379 for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)
380 check_block(block.logical_preds[j] < block.logical_preds[j + 1], "logical predecessors must be sorted", &block);
381 for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)
382 check_block(block.linear_succs[j] < block.linear_succs[j + 1], "linear successors must be sorted", &block);
383 for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)
384 check_block(block.logical_succs[j] < block.logical_succs[j + 1], "logical successors must be sorted", &block);
385
386 /* critical edges are not allowed */
387 if (block.linear_preds.size() > 1) {
388 for (unsigned pred : block.linear_preds)
389 check_block(program->blocks[pred].linear_succs.size() == 1, "linear critical edges are not allowed", &program->blocks[pred]);
390 for (unsigned pred : block.logical_preds)
391 check_block(program->blocks[pred].logical_succs.size() == 1, "logical critical edges are not allowed", &program->blocks[pred]);
392 }
393 }
394
395 assert(is_valid);
396 }
397
398 /* RA validation */
399 namespace {
400
401 struct Location {
402 Location() : block(NULL), instr(NULL) {}
403
404 Block *block;
405 Instruction *instr; //NULL if it's the block's live-in
406 };
407
408 struct Assignment {
409 Location defloc;
410 Location firstloc;
411 PhysReg reg;
412 };
413
414 bool ra_fail(FILE *output, Location loc, Location loc2, const char *fmt, ...) {
415 va_list args;
416 va_start(args, fmt);
417 char msg[1024];
418 vsprintf(msg, fmt, args);
419 va_end(args);
420
421 fprintf(stderr, "RA error found at instruction in BB%d:\n", loc.block->index);
422 if (loc.instr) {
423 aco_print_instr(loc.instr, stderr);
424 fprintf(stderr, "\n%s", msg);
425 } else {
426 fprintf(stderr, "%s", msg);
427 }
428 if (loc2.block) {
429 fprintf(stderr, " in BB%d:\n", loc2.block->index);
430 aco_print_instr(loc2.instr, stderr);
431 }
432 fprintf(stderr, "\n\n");
433
434 return true;
435 }
436
437 } /* end namespace */
438
439 bool validate_ra(Program *program, const struct radv_nir_compiler_options *options, FILE *output) {
440 if (!(debug_flags & DEBUG_VALIDATE_RA))
441 return false;
442
443 bool err = false;
444 aco::live live_vars = aco::live_var_analysis(program, options);
445 std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
446
447 std::map<unsigned, Assignment> assignments;
448 for (Block& block : program->blocks) {
449 Location loc;
450 loc.block = &block;
451 for (aco_ptr<Instruction>& instr : block.instructions) {
452 if (instr->opcode == aco_opcode::p_phi) {
453 for (unsigned i = 0; i < instr->operands.size(); i++) {
454 if (instr->operands[i].isTemp() &&
455 instr->operands[i].getTemp().type() == RegType::sgpr &&
456 instr->operands[i].isFirstKill())
457 phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
458 }
459 }
460
461 loc.instr = instr.get();
462 for (unsigned i = 0; i < instr->operands.size(); i++) {
463 Operand& op = instr->operands[i];
464 if (!op.isTemp())
465 continue;
466 if (!op.isFixed())
467 err |= ra_fail(output, loc, Location(), "Operand %d is not assigned a register", i);
468 if (assignments.count(op.tempId()) && assignments[op.tempId()].reg != op.physReg())
469 err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an inconsistent register assignment with instruction", i);
470 if ((op.getTemp().type() == RegType::vgpr && op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
471 (op.getTemp().type() == RegType::sgpr && op.physReg() + op.size() > program->config->num_sgprs && op.physReg() < program->sgpr_limit))
472 err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i);
473 if (op.physReg() == vcc && !program->needs_vcc)
474 err |= ra_fail(output, loc, Location(), "Operand %d fixed to vcc but needs_vcc=false", i);
475 if (!instr_can_access_subdword(instr) && op.regClass().is_subdword() && op.physReg().byte())
476 err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d must be aligned to a full register", i);
477 if (!assignments[op.tempId()].firstloc.block)
478 assignments[op.tempId()].firstloc = loc;
479 if (!assignments[op.tempId()].defloc.block)
480 assignments[op.tempId()].reg = op.physReg();
481 }
482
483 for (unsigned i = 0; i < instr->definitions.size(); i++) {
484 Definition& def = instr->definitions[i];
485 if (!def.isTemp())
486 continue;
487 if (!def.isFixed())
488 err |= ra_fail(output, loc, Location(), "Definition %d is not assigned a register", i);
489 if (assignments[def.tempId()].defloc.block)
490 err |= ra_fail(output, loc, assignments.at(def.tempId()).defloc, "Temporary %%%d also defined by instruction", def.tempId());
491 if ((def.getTemp().type() == RegType::vgpr && def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
492 (def.getTemp().type() == RegType::sgpr && def.physReg() + def.size() > program->config->num_sgprs && def.physReg() < program->sgpr_limit))
493 err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i);
494 if (def.physReg() == vcc && !program->needs_vcc)
495 err |= ra_fail(output, loc, Location(), "Definition %d fixed to vcc but needs_vcc=false", i);
496 if (!instr_can_access_subdword(instr) && def.regClass().is_subdword() && def.physReg().byte())
497 err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d must be aligned to a full register", i);
498 if (!assignments[def.tempId()].firstloc.block)
499 assignments[def.tempId()].firstloc = loc;
500 assignments[def.tempId()].defloc = loc;
501 assignments[def.tempId()].reg = def.physReg();
502 }
503 }
504 }
505
506 for (Block& block : program->blocks) {
507 Location loc;
508 loc.block = &block;
509
510 std::array<unsigned, 2048> regs; /* register file in bytes */
511 regs.fill(0);
512
513 std::set<Temp> live;
514 live.insert(live_vars.live_out[block.index].begin(), live_vars.live_out[block.index].end());
515 /* remove killed p_phi sgpr operands */
516 for (Temp tmp : phi_sgpr_ops[block.index])
517 live.erase(tmp);
518
519 /* check live out */
520 for (Temp tmp : live) {
521 PhysReg reg = assignments.at(tmp.id()).reg;
522 for (unsigned i = 0; i < tmp.bytes(); i++) {
523 if (regs[reg.reg_b + i]) {
524 err |= ra_fail(output, loc, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i, tmp.id(), regs[reg.reg_b + i]);
525 }
526 regs[reg.reg_b + i] = tmp.id();
527 }
528 }
529 regs.fill(0);
530
531 for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {
532 aco_ptr<Instruction>& instr = *it;
533
534 /* check killed p_phi sgpr operands */
535 if (instr->opcode == aco_opcode::p_logical_end) {
536 for (Temp tmp : phi_sgpr_ops[block.index]) {
537 PhysReg reg = assignments.at(tmp.id()).reg;
538 for (unsigned i = 0; i < tmp.bytes(); i++) {
539 if (regs[reg.reg_b + i])
540 err |= ra_fail(output, loc, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i, tmp.id(), regs[reg.reg_b + i]);
541 }
542 live.emplace(tmp);
543 }
544 }
545
546 for (const Definition& def : instr->definitions) {
547 if (!def.isTemp())
548 continue;
549 live.erase(def.getTemp());
550 }
551
552 /* don't count phi operands as live-in, since they are actually
553 * killed when they are copied at the predecessor */
554 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
555 for (const Operand& op : instr->operands) {
556 if (!op.isTemp())
557 continue;
558 live.insert(op.getTemp());
559 }
560 }
561 }
562
563 for (Temp tmp : live) {
564 PhysReg reg = assignments.at(tmp.id()).reg;
565 for (unsigned i = 0; i < tmp.bytes(); i++)
566 regs[reg.reg_b + i] = tmp.id();
567 }
568
569 for (aco_ptr<Instruction>& instr : block.instructions) {
570 loc.instr = instr.get();
571
572 /* remove killed p_phi operands from regs */
573 if (instr->opcode == aco_opcode::p_logical_end) {
574 for (Temp tmp : phi_sgpr_ops[block.index]) {
575 PhysReg reg = assignments.at(tmp.id()).reg;
576 for (unsigned i = 0; i < tmp.bytes(); i++)
577 regs[reg.reg_b + i] = 0;
578 }
579 }
580
581 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
582 for (const Operand& op : instr->operands) {
583 if (!op.isTemp())
584 continue;
585 if (op.isFirstKillBeforeDef()) {
586 for (unsigned j = 0; j < op.getTemp().bytes(); j++)
587 regs[op.physReg().reg_b + j] = 0;
588 }
589 }
590 }
591
592 for (unsigned i = 0; i < instr->definitions.size(); i++) {
593 Definition& def = instr->definitions[i];
594 if (!def.isTemp())
595 continue;
596 Temp tmp = def.getTemp();
597 PhysReg reg = assignments.at(tmp.id()).reg;
598 for (unsigned j = 0; j < tmp.bytes(); j++) {
599 if (regs[reg.reg_b + j])
600 err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d already taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);
601 regs[reg.reg_b + j] = tmp.id();
602 }
603 if (def.regClass().is_subdword() && !instr_can_access_subdword(instr)) {
604 for (unsigned j = tmp.bytes(); j < 4; j++)
605 if (regs[reg.reg_b + j])
606 err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d overwrites the full register taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);
607 }
608 }
609
610 for (const Definition& def : instr->definitions) {
611 if (!def.isTemp())
612 continue;
613 if (def.isKill()) {
614 for (unsigned j = 0; j < def.getTemp().bytes(); j++)
615 regs[def.physReg().reg_b + j] = 0;
616 }
617 }
618
619 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
620 for (const Operand& op : instr->operands) {
621 if (!op.isTemp())
622 continue;
623 if (op.isLateKill() && op.isFirstKill()) {
624 for (unsigned j = 0; j < op.getTemp().bytes(); j++)
625 regs[op.physReg().reg_b + j] = 0;
626 }
627 }
628 }
629 }
630 }
631
632 return err;
633 }
634 }