ace84db1018c3841421a6da4a767fa71627fb783
[mesa.git] / src / amd / compiler / aco_ir.h
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifndef ACO_IR_H
26 #define ACO_IR_H
27
28 #include <vector>
29 #include <set>
30 #include <bitset>
31 #include <memory>
32
33 #include "nir.h"
34 #include "ac_binary.h"
35 #include "amd_family.h"
36 #include "aco_opcodes.h"
37 #include "aco_util.h"
38
39 struct radv_nir_compiler_options;
40 struct radv_shader_args;
41 struct radv_shader_info;
42
43 namespace aco {
44
45 extern uint64_t debug_flags;
46
47 enum {
48 DEBUG_VALIDATE = 0x1,
49 DEBUG_VALIDATE_RA = 0x2,
50 DEBUG_PERFWARN = 0x4,
51 };
52
53 /**
54 * Representation of the instruction's microcode encoding format
55 * Note: Some Vector ALU Formats can be combined, such that:
56 * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
57 * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
58 * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
59 *
60 * (*) The same is applicable for VOP1 and VOPC instructions.
61 */
62 enum class Format : std::uint16_t {
63 /* Pseudo Instruction Format */
64 PSEUDO = 0,
65 /* Scalar ALU & Control Formats */
66 SOP1 = 1,
67 SOP2 = 2,
68 SOPK = 3,
69 SOPP = 4,
70 SOPC = 5,
71 /* Scalar Memory Format */
72 SMEM = 6,
73 /* LDS/GDS Format */
74 DS = 8,
75 /* Vector Memory Buffer Formats */
76 MTBUF = 9,
77 MUBUF = 10,
78 /* Vector Memory Image Format */
79 MIMG = 11,
80 /* Export Format */
81 EXP = 12,
82 /* Flat Formats */
83 FLAT = 13,
84 GLOBAL = 14,
85 SCRATCH = 15,
86
87 PSEUDO_BRANCH = 16,
88 PSEUDO_BARRIER = 17,
89 PSEUDO_REDUCTION = 18,
90
91 /* Vector ALU Formats */
92 VOP1 = 1 << 8,
93 VOP2 = 1 << 9,
94 VOPC = 1 << 10,
95 VOP3 = 1 << 11,
96 VOP3A = 1 << 11,
97 VOP3B = 1 << 11,
98 VOP3P = 1 << 12,
99 /* Vector Parameter Interpolation Format */
100 VINTRP = 1 << 13,
101 DPP = 1 << 14,
102 SDWA = 1 << 15,
103 };
104
105 enum barrier_interaction : uint8_t {
106 barrier_none = 0,
107 barrier_buffer = 0x1,
108 barrier_image = 0x2,
109 barrier_atomic = 0x4,
110 barrier_shared = 0x8,
111 /* used for geometry shaders to ensure vertex data writes are before the
112 * GS_DONE s_sendmsg. */
113 barrier_gs_data = 0x10,
114 /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
115 barrier_gs_sendmsg = 0x20,
116 /* used by barriers. created by s_barrier */
117 barrier_barrier = 0x40,
118 barrier_count = 6,
119 };
120
121 enum fp_round {
122 fp_round_ne = 0,
123 fp_round_pi = 1,
124 fp_round_ni = 2,
125 fp_round_tz = 3,
126 };
127
128 enum fp_denorm {
129 /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
130 * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
131 fp_denorm_flush = 0x0,
132 fp_denorm_keep = 0x3,
133 };
134
135 struct float_mode {
136 /* matches encoding of the MODE register */
137 union {
138 struct {
139 fp_round round32:2;
140 fp_round round16_64:2;
141 unsigned denorm32:2;
142 unsigned denorm16_64:2;
143 };
144 uint8_t val = 0;
145 };
146 /* if false, optimizations which may remove infs/nan/-0.0 can be done */
147 bool preserve_signed_zero_inf_nan32:1;
148 bool preserve_signed_zero_inf_nan16_64:1;
149 /* if false, optimizations which may remove denormal flushing can be done */
150 bool must_flush_denorms32:1;
151 bool must_flush_denorms16_64:1;
152 bool care_about_round32:1;
153 bool care_about_round16_64:1;
154
155 /* Returns true if instructions using the mode "other" can safely use the
156 * current one instead. */
157 bool canReplace(float_mode other) const noexcept {
158 return val == other.val &&
159 (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
160 (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
161 (must_flush_denorms32 || !other.must_flush_denorms32) &&
162 (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
163 (care_about_round32 || !other.care_about_round32) &&
164 (care_about_round16_64 || !other.care_about_round16_64);
165 }
166 };
167
168 constexpr Format asVOP3(Format format) {
169 return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
170 };
171
172 enum class RegType {
173 none = 0,
174 sgpr,
175 vgpr,
176 linear_vgpr,
177 };
178
179 struct RegClass {
180
181 enum RC : uint8_t {
182 s1 = 1,
183 s2 = 2,
184 s3 = 3,
185 s4 = 4,
186 s6 = 6,
187 s8 = 8,
188 s16 = 16,
189 v1 = s1 | (1 << 5),
190 v2 = s2 | (1 << 5),
191 v3 = s3 | (1 << 5),
192 v4 = s4 | (1 << 5),
193 v5 = 5 | (1 << 5),
194 v6 = 6 | (1 << 5),
195 v7 = 7 | (1 << 5),
196 v8 = 8 | (1 << 5),
197 /* these are used for WWM and spills to vgpr */
198 v1_linear = v1 | (1 << 6),
199 v2_linear = v2 | (1 << 6),
200 };
201
202 RegClass() = default;
203 constexpr RegClass(RC rc)
204 : rc(rc) {}
205 constexpr RegClass(RegType type, unsigned size)
206 : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
207
208 constexpr operator RC() const { return rc; }
209 explicit operator bool() = delete;
210
211 constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
212 constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
213 constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
214 constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
215
216 private:
217 RC rc;
218 };
219
220 /* transitional helper expressions */
221 static constexpr RegClass s1{RegClass::s1};
222 static constexpr RegClass s2{RegClass::s2};
223 static constexpr RegClass s3{RegClass::s3};
224 static constexpr RegClass s4{RegClass::s4};
225 static constexpr RegClass s8{RegClass::s8};
226 static constexpr RegClass s16{RegClass::s16};
227 static constexpr RegClass v1{RegClass::v1};
228 static constexpr RegClass v2{RegClass::v2};
229 static constexpr RegClass v3{RegClass::v3};
230 static constexpr RegClass v4{RegClass::v4};
231 static constexpr RegClass v5{RegClass::v5};
232 static constexpr RegClass v6{RegClass::v6};
233 static constexpr RegClass v7{RegClass::v7};
234 static constexpr RegClass v8{RegClass::v8};
235
236 /**
237 * Temp Class
238 * Each temporary virtual register has a
239 * register class (i.e. size and type)
240 * and SSA id.
241 */
242 struct Temp {
243 Temp() = default;
244 constexpr Temp(uint32_t id, RegClass cls) noexcept
245 : id_(id), reg_class(cls) {}
246
247 constexpr uint32_t id() const noexcept { return id_; }
248 constexpr RegClass regClass() const noexcept { return reg_class; }
249
250 constexpr unsigned size() const noexcept { return reg_class.size(); }
251 constexpr RegType type() const noexcept { return reg_class.type(); }
252 constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
253
254 constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
255 constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
256 constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
257
258 private:
259 uint32_t id_:24;
260 RegClass reg_class;
261 };
262
263 /**
264 * PhysReg
265 * Represents the physical register for each
266 * Operand and Definition.
267 */
268 struct PhysReg {
269 constexpr PhysReg() = default;
270 explicit constexpr PhysReg(unsigned r) : reg(r) {}
271 constexpr operator unsigned() const { return reg; }
272
273 uint16_t reg = 0;
274 };
275
276 /* helper expressions for special registers */
277 static constexpr PhysReg m0{124};
278 static constexpr PhysReg vcc{106};
279 static constexpr PhysReg vcc_hi{107};
280 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
281 static constexpr PhysReg exec{126};
282 static constexpr PhysReg exec_lo{126};
283 static constexpr PhysReg exec_hi{127};
284 static constexpr PhysReg vccz{251};
285 static constexpr PhysReg execz{252};
286 static constexpr PhysReg scc{253};
287
288 /**
289 * Operand Class
290 * Initially, each Operand refers to either
291 * a temporary virtual register
292 * or to a constant value
293 * Temporary registers get mapped to physical register during RA
294 * Constant values are inlined into the instruction sequence.
295 */
296 class Operand final
297 {
298 public:
299 constexpr Operand()
300 : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
301 isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false),
302 isLateKill_(false) {}
303
304 explicit Operand(Temp r) noexcept
305 {
306 data_.temp = r;
307 if (r.id()) {
308 isTemp_ = true;
309 } else {
310 isUndef_ = true;
311 setFixed(PhysReg{128});
312 }
313 };
314 explicit Operand(uint32_t v, bool is64bit = false) noexcept
315 {
316 data_.i = v;
317 isConstant_ = true;
318 is64BitConst_ = is64bit;
319 if (v <= 64)
320 setFixed(PhysReg{128 + v});
321 else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
322 setFixed(PhysReg{192 - v});
323 else if (v == 0x3f000000) /* 0.5 */
324 setFixed(PhysReg{240});
325 else if (v == 0xbf000000) /* -0.5 */
326 setFixed(PhysReg{241});
327 else if (v == 0x3f800000) /* 1.0 */
328 setFixed(PhysReg{242});
329 else if (v == 0xbf800000) /* -1.0 */
330 setFixed(PhysReg{243});
331 else if (v == 0x40000000) /* 2.0 */
332 setFixed(PhysReg{244});
333 else if (v == 0xc0000000) /* -2.0 */
334 setFixed(PhysReg{245});
335 else if (v == 0x40800000) /* 4.0 */
336 setFixed(PhysReg{246});
337 else if (v == 0xc0800000) /* -4.0 */
338 setFixed(PhysReg{247});
339 else { /* Literal Constant */
340 assert(!is64bit && "attempt to create a 64-bit literal constant");
341 setFixed(PhysReg{255});
342 }
343 };
344 explicit Operand(uint64_t v) noexcept
345 {
346 isConstant_ = true;
347 is64BitConst_ = true;
348 if (v <= 64) {
349 data_.i = (uint32_t) v;
350 setFixed(PhysReg{128 + (uint32_t) v});
351 } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
352 data_.i = (uint32_t) v;
353 setFixed(PhysReg{192 - (uint32_t) v});
354 } else if (v == 0x3FE0000000000000) { /* 0.5 */
355 data_.i = 0x3f000000;
356 setFixed(PhysReg{240});
357 } else if (v == 0xBFE0000000000000) { /* -0.5 */
358 data_.i = 0xbf000000;
359 setFixed(PhysReg{241});
360 } else if (v == 0x3FF0000000000000) { /* 1.0 */
361 data_.i = 0x3f800000;
362 setFixed(PhysReg{242});
363 } else if (v == 0xBFF0000000000000) { /* -1.0 */
364 data_.i = 0xbf800000;
365 setFixed(PhysReg{243});
366 } else if (v == 0x4000000000000000) { /* 2.0 */
367 data_.i = 0x40000000;
368 setFixed(PhysReg{244});
369 } else if (v == 0xC000000000000000) { /* -2.0 */
370 data_.i = 0xc0000000;
371 setFixed(PhysReg{245});
372 } else if (v == 0x4010000000000000) { /* 4.0 */
373 data_.i = 0x40800000;
374 setFixed(PhysReg{246});
375 } else if (v == 0xC010000000000000) { /* -4.0 */
376 data_.i = 0xc0800000;
377 setFixed(PhysReg{247});
378 } else { /* Literal Constant: we don't know if it is a long or double.*/
379 isConstant_ = 0;
380 assert(false && "attempt to create a 64-bit literal constant");
381 }
382 };
383 explicit Operand(RegClass type) noexcept
384 {
385 isUndef_ = true;
386 data_.temp = Temp(0, type);
387 setFixed(PhysReg{128});
388 };
389 explicit Operand(PhysReg reg, RegClass type) noexcept
390 {
391 data_.temp = Temp(0, type);
392 setFixed(reg);
393 }
394
395 constexpr bool isTemp() const noexcept
396 {
397 return isTemp_;
398 }
399
400 constexpr void setTemp(Temp t) noexcept {
401 assert(!isConstant_);
402 isTemp_ = true;
403 data_.temp = t;
404 }
405
406 constexpr Temp getTemp() const noexcept
407 {
408 return data_.temp;
409 }
410
411 constexpr uint32_t tempId() const noexcept
412 {
413 return data_.temp.id();
414 }
415
416 constexpr bool hasRegClass() const noexcept
417 {
418 return isTemp() || isUndefined();
419 }
420
421 constexpr RegClass regClass() const noexcept
422 {
423 return data_.temp.regClass();
424 }
425
426 constexpr unsigned size() const noexcept
427 {
428 if (isConstant())
429 return is64BitConst_ ? 2 : 1;
430 else
431 return data_.temp.size();
432 }
433
434 constexpr bool isFixed() const noexcept
435 {
436 return isFixed_;
437 }
438
439 constexpr PhysReg physReg() const noexcept
440 {
441 return reg_;
442 }
443
444 constexpr void setFixed(PhysReg reg) noexcept
445 {
446 isFixed_ = reg != unsigned(-1);
447 reg_ = reg;
448 }
449
450 constexpr bool isConstant() const noexcept
451 {
452 return isConstant_;
453 }
454
455 constexpr bool isLiteral() const noexcept
456 {
457 return isConstant() && reg_ == 255;
458 }
459
460 constexpr bool isUndefined() const noexcept
461 {
462 return isUndef_;
463 }
464
465 constexpr uint32_t constantValue() const noexcept
466 {
467 return data_.i;
468 }
469
470 constexpr bool constantEquals(uint32_t cmp) const noexcept
471 {
472 return isConstant() && constantValue() == cmp;
473 }
474
475 constexpr uint64_t constantValue64(bool signext=false) const noexcept
476 {
477 if (is64BitConst_) {
478 if (reg_.reg <= 192)
479 return reg_.reg - 128;
480 else if (reg_.reg <= 208)
481 return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193);
482
483 switch (reg_.reg) {
484 case 240:
485 return 0x3FE0000000000000;
486 case 241:
487 return 0xBFE0000000000000;
488 case 242:
489 return 0x3FF0000000000000;
490 case 243:
491 return 0xBFF0000000000000;
492 case 244:
493 return 0x4000000000000000;
494 case 245:
495 return 0xC000000000000000;
496 case 246:
497 return 0x4010000000000000;
498 case 247:
499 return 0xC010000000000000;
500 }
501 }
502 return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
503 }
504
505 /* Indicates that the killed operand's live range intersects with the
506 * instruction's definitions. Unlike isKill() and isFirstKill(), this is
507 * not set by liveness analysis. */
508 constexpr void setLateKill(bool flag) noexcept
509 {
510 isLateKill_ = flag;
511 }
512
513 constexpr bool isLateKill() const noexcept
514 {
515 return isLateKill_;
516 }
517
518 constexpr void setKill(bool flag) noexcept
519 {
520 isKill_ = flag;
521 if (!flag)
522 setFirstKill(false);
523 }
524
525 constexpr bool isKill() const noexcept
526 {
527 return isKill_ || isFirstKill();
528 }
529
530 constexpr void setFirstKill(bool flag) noexcept
531 {
532 isFirstKill_ = flag;
533 if (flag)
534 setKill(flag);
535 }
536
537 /* When there are multiple operands killing the same temporary,
538 * isFirstKill() is only returns true for the first one. */
539 constexpr bool isFirstKill() const noexcept
540 {
541 return isFirstKill_;
542 }
543
544 constexpr bool isKillBeforeDef() const noexcept
545 {
546 return isKill() && !isLateKill();
547 }
548
549 constexpr bool isFirstKillBeforeDef() const noexcept
550 {
551 return isFirstKill() && !isLateKill();
552 }
553
554 constexpr bool operator == (Operand other) const noexcept
555 {
556 if (other.size() != size())
557 return false;
558 if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
559 return false;
560 if (isFixed() && other.isFixed() && physReg() != other.physReg())
561 return false;
562 if (isLiteral())
563 return other.isLiteral() && other.constantValue() == constantValue();
564 else if (isConstant())
565 return other.isConstant() && other.physReg() == physReg();
566 else if (isUndefined())
567 return other.isUndefined() && other.regClass() == regClass();
568 else
569 return other.isTemp() && other.getTemp() == getTemp();
570 }
571 private:
572 union {
573 uint32_t i;
574 float f;
575 Temp temp = Temp(0, s1);
576 } data_;
577 PhysReg reg_;
578 union {
579 struct {
580 uint8_t isTemp_:1;
581 uint8_t isFixed_:1;
582 uint8_t isConstant_:1;
583 uint8_t isKill_:1;
584 uint8_t isUndef_:1;
585 uint8_t isFirstKill_:1;
586 uint8_t is64BitConst_:1;
587 uint8_t isLateKill_:1;
588 };
589 /* can't initialize bit-fields in c++11, so work around using a union */
590 uint8_t control_ = 0;
591 };
592 };
593
594 /**
595 * Definition Class
596 * Definitions are the results of Instructions
597 * and refer to temporary virtual registers
598 * which are later mapped to physical registers
599 */
600 class Definition final
601 {
602 public:
603 constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
604 Definition(uint32_t index, RegClass type) noexcept
605 : temp(index, type) {}
606 explicit Definition(Temp tmp) noexcept
607 : temp(tmp) {}
608 Definition(PhysReg reg, RegClass type) noexcept
609 : temp(Temp(0, type))
610 {
611 setFixed(reg);
612 }
613 Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
614 : temp(Temp(tmpId, type))
615 {
616 setFixed(reg);
617 }
618
619 constexpr bool isTemp() const noexcept
620 {
621 return tempId() > 0;
622 }
623
624 constexpr Temp getTemp() const noexcept
625 {
626 return temp;
627 }
628
629 constexpr uint32_t tempId() const noexcept
630 {
631 return temp.id();
632 }
633
634 constexpr void setTemp(Temp t) noexcept {
635 temp = t;
636 }
637
638 constexpr RegClass regClass() const noexcept
639 {
640 return temp.regClass();
641 }
642
643 constexpr unsigned size() const noexcept
644 {
645 return temp.size();
646 }
647
648 constexpr bool isFixed() const noexcept
649 {
650 return isFixed_;
651 }
652
653 constexpr PhysReg physReg() const noexcept
654 {
655 return reg_;
656 }
657
658 constexpr void setFixed(PhysReg reg) noexcept
659 {
660 isFixed_ = 1;
661 reg_ = reg;
662 }
663
664 constexpr void setHint(PhysReg reg) noexcept
665 {
666 hasHint_ = 1;
667 reg_ = reg;
668 }
669
670 constexpr bool hasHint() const noexcept
671 {
672 return hasHint_;
673 }
674
675 constexpr void setKill(bool flag) noexcept
676 {
677 isKill_ = flag;
678 }
679
680 constexpr bool isKill() const noexcept
681 {
682 return isKill_;
683 }
684
685 private:
686 Temp temp = Temp(0, s1);
687 PhysReg reg_;
688 union {
689 struct {
690 uint8_t isFixed_:1;
691 uint8_t hasHint_:1;
692 uint8_t isKill_:1;
693 };
694 /* can't initialize bit-fields in c++11, so work around using a union */
695 uint8_t control_ = 0;
696 };
697 };
698
699 class Block;
700
701 struct Instruction {
702 aco_opcode opcode;
703 Format format;
704 uint32_t pass_flags;
705
706 aco::span<Operand> operands;
707 aco::span<Definition> definitions;
708
709 constexpr bool isVALU() const noexcept
710 {
711 return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
712 || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
713 || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
714 || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
715 || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
716 || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
717 }
718
719 constexpr bool isSALU() const noexcept
720 {
721 return format == Format::SOP1 ||
722 format == Format::SOP2 ||
723 format == Format::SOPC ||
724 format == Format::SOPK ||
725 format == Format::SOPP;
726 }
727
728 constexpr bool isVMEM() const noexcept
729 {
730 return format == Format::MTBUF ||
731 format == Format::MUBUF ||
732 format == Format::MIMG;
733 }
734
735 constexpr bool isDPP() const noexcept
736 {
737 return (uint16_t) format & (uint16_t) Format::DPP;
738 }
739
740 constexpr bool isVOP3() const noexcept
741 {
742 return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
743 ((uint16_t) format & (uint16_t) Format::VOP3B) ||
744 format == Format::VOP3P;
745 }
746
747 constexpr bool isSDWA() const noexcept
748 {
749 return (uint16_t) format & (uint16_t) Format::SDWA;
750 }
751
752 constexpr bool isFlatOrGlobal() const noexcept
753 {
754 return format == Format::FLAT || format == Format::GLOBAL;
755 }
756
757 constexpr bool usesModifiers() const noexcept;
758
759 constexpr bool reads_exec() const noexcept
760 {
761 for (const Operand& op : operands) {
762 if (op.isFixed() && op.physReg() == exec)
763 return true;
764 }
765 return false;
766 }
767 };
768
769 struct SOPK_instruction : public Instruction {
770 uint16_t imm;
771 };
772
773 struct SOPP_instruction : public Instruction {
774 uint32_t imm;
775 int block;
776 };
777
778 struct SOPC_instruction : public Instruction {
779 };
780
781 struct SOP1_instruction : public Instruction {
782 };
783
784 struct SOP2_instruction : public Instruction {
785 };
786
787 /**
788 * Scalar Memory Format:
789 * For s_(buffer_)load_dword*:
790 * Operand(0): SBASE - SGPR-pair which provides base address
791 * Operand(1): Offset - immediate (un)signed offset or SGPR
792 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
793 * Operand(n-1): SOffset - SGPR offset (Vega only)
794 *
795 * Having no operands is also valid for instructions such as s_dcache_inv.
796 *
797 */
798 struct SMEM_instruction : public Instruction {
799 bool glc : 1; /* VI+: globally coherent */
800 bool dlc : 1; /* NAVI: device level coherent */
801 bool nv : 1; /* VEGA only: Non-volatile */
802 bool can_reorder : 1;
803 bool disable_wqm : 1;
804 barrier_interaction barrier;
805 };
806
807 struct VOP1_instruction : public Instruction {
808 };
809
810 struct VOP2_instruction : public Instruction {
811 };
812
813 struct VOPC_instruction : public Instruction {
814 };
815
816 struct VOP3A_instruction : public Instruction {
817 bool abs[3];
818 bool neg[3];
819 uint8_t opsel : 4;
820 uint8_t omod : 2;
821 bool clamp : 1;
822 };
823
824 /**
825 * Data Parallel Primitives Format:
826 * This format can be used for VOP1, VOP2 or VOPC instructions.
827 * The swizzle applies to the src0 operand.
828 *
829 */
830 struct DPP_instruction : public Instruction {
831 bool abs[2];
832 bool neg[2];
833 uint16_t dpp_ctrl;
834 uint8_t row_mask : 4;
835 uint8_t bank_mask : 4;
836 bool bound_ctrl : 1;
837 };
838
839 struct Interp_instruction : public Instruction {
840 uint8_t attribute;
841 uint8_t component;
842 };
843
844 /**
845 * Local and Global Data Sharing instructions
846 * Operand(0): ADDR - VGPR which supplies the address.
847 * Operand(1): DATA0 - First data VGPR.
848 * Operand(2): DATA1 - Second data VGPR.
849 * Operand(n-1): M0 - LDS size.
850 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
851 *
852 */
853 struct DS_instruction : public Instruction {
854 int16_t offset0;
855 int8_t offset1;
856 bool gds;
857 };
858
859 /**
860 * Vector Memory Untyped-buffer Instructions
861 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
862 * Operand(1): VADDR - Address source. Can carry an index and/or offset
863 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
864 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
865 *
866 */
867 struct MUBUF_instruction : public Instruction {
868 uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
869 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
870 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
871 bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
872 bool glc : 1; /* globally coherent */
873 bool dlc : 1; /* NAVI: device level coherent */
874 bool slc : 1; /* system level coherent */
875 bool tfe : 1; /* texture fail enable */
876 bool lds : 1; /* Return read-data to LDS instead of VGPRs */
877 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
878 bool can_reorder : 1;
879 barrier_interaction barrier;
880 };
881
882 /**
883 * Vector Memory Typed-buffer Instructions
884 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
885 * Operand(1): VADDR - Address source. Can carry an index and/or offset
886 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
887 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
888 *
889 */
890 struct MTBUF_instruction : public Instruction {
891 uint16_t offset; /* Unsigned byte offset - 12 bit */
892 uint8_t dfmt : 4; /* Data Format of data in memory buffer */
893 uint8_t nfmt : 3; /* Numeric format of data in memory */
894 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
895 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
896 bool glc : 1; /* globally coherent */
897 bool dlc : 1; /* NAVI: device level coherent */
898 bool slc : 1; /* system level coherent */
899 bool tfe : 1; /* texture fail enable */
900 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
901 bool can_reorder : 1;
902 barrier_interaction barrier;
903 };
904
905 /**
906 * Vector Memory Image Instructions
907 * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
908 * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
909 * or VDATA - Vector GPR for write data.
910 * Operand(2): VADDR - Address source. Can carry an offset or an index.
911 * Definition(0): VDATA - Vector GPR for read result.
912 *
913 */
914 struct MIMG_instruction : public Instruction {
915 uint8_t dmask; /* Data VGPR enable mask */
916 uint8_t dim : 3; /* NAVI: dimensionality */
917 bool unrm : 1; /* Force address to be un-normalized */
918 bool dlc : 1; /* NAVI: device level coherent */
919 bool glc : 1; /* globally coherent */
920 bool slc : 1; /* system level coherent */
921 bool tfe : 1; /* texture fail enable */
922 bool da : 1; /* declare an array */
923 bool lwe : 1; /* Force data to be un-normalized */
924 bool r128 : 1; /* NAVI: Texture resource size */
925 bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
926 bool d16 : 1; /* Convert 32-bit data to 16-bit data */
927 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
928 bool can_reorder : 1;
929 barrier_interaction barrier;
930 };
931
932 /**
933 * Flat/Scratch/Global Instructions
934 * Operand(0): ADDR
935 * Operand(1): SADDR
936 * Operand(2) / Definition(0): DATA/VDST
937 *
938 */
939 struct FLAT_instruction : public Instruction {
940 uint16_t offset; /* Vega/Navi only */
941 bool slc : 1; /* system level coherent */
942 bool glc : 1; /* globally coherent */
943 bool dlc : 1; /* NAVI: device level coherent */
944 bool lds : 1;
945 bool nv : 1;
946 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
947 bool can_reorder : 1;
948 barrier_interaction barrier;
949 };
950
951 struct Export_instruction : public Instruction {
952 uint8_t enabled_mask;
953 uint8_t dest;
954 bool compressed : 1;
955 bool done : 1;
956 bool valid_mask : 1;
957 };
958
959 struct Pseudo_instruction : public Instruction {
960 bool tmp_in_scc;
961 PhysReg scratch_sgpr; /* might not be valid if it's not needed */
962 };
963
964 struct Pseudo_branch_instruction : public Instruction {
965 /* target[0] is the block index of the branch target.
966 * For conditional branches, target[1] contains the fall-through alternative.
967 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
968 */
969 uint32_t target[2];
970 };
971
972 struct Pseudo_barrier_instruction : public Instruction {
973 };
974
975 enum ReduceOp {
976 iadd32, iadd64,
977 imul32, imul64,
978 fadd32, fadd64,
979 fmul32, fmul64,
980 imin32, imin64,
981 imax32, imax64,
982 umin32, umin64,
983 umax32, umax64,
984 fmin32, fmin64,
985 fmax32, fmax64,
986 iand32, iand64,
987 ior32, ior64,
988 ixor32, ixor64,
989 gfx10_wave64_bpermute
990 };
991
992 /**
993 * Subgroup Reduction Instructions, everything except for the data to be
994 * reduced and the result as inserted by setup_reduce_temp().
995 * Operand(0): data to be reduced
996 * Operand(1): reduce temporary
997 * Operand(2): vector temporary
998 * Definition(0): result
999 * Definition(1): scalar temporary
1000 * Definition(2): scalar identity temporary (not used to store identity on GFX10)
1001 * Definition(3): scc clobber
1002 * Definition(4): vcc clobber
1003 *
1004 */
1005 struct Pseudo_reduction_instruction : public Instruction {
1006 ReduceOp reduce_op;
1007 unsigned cluster_size; // must be 0 for scans
1008 };
1009
1010 struct instr_deleter_functor {
1011 void operator()(void* p) {
1012 free(p);
1013 }
1014 };
1015
1016 template<typename T>
1017 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
1018
1019 template<typename T>
1020 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
1021 {
1022 std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
1023 char *data = (char*) calloc(1, size);
1024 T* inst = (T*) data;
1025
1026 inst->opcode = opcode;
1027 inst->format = format;
1028
1029 uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
1030 inst->operands = aco::span<Operand>(operands_offset, num_operands);
1031 uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
1032 inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
1033
1034 return inst;
1035 }
1036
1037 constexpr bool Instruction::usesModifiers() const noexcept
1038 {
1039 if (isDPP() || isSDWA())
1040 return true;
1041 if (!isVOP3())
1042 return false;
1043 const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
1044 for (unsigned i = 0; i < operands.size(); i++) {
1045 if (vop3->abs[i] || vop3->neg[i])
1046 return true;
1047 }
1048 return vop3->opsel || vop3->clamp || vop3->omod;
1049 }
1050
1051 constexpr bool is_phi(Instruction* instr)
1052 {
1053 return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1054 }
1055
1056 static inline bool is_phi(aco_ptr<Instruction>& instr)
1057 {
1058 return is_phi(instr.get());
1059 }
1060
1061 barrier_interaction get_barrier_interaction(Instruction* instr);
1062
1063 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
1064
1065 enum block_kind {
1066 /* uniform indicates that leaving this block,
1067 * all actives lanes stay active */
1068 block_kind_uniform = 1 << 0,
1069 block_kind_top_level = 1 << 1,
1070 block_kind_loop_preheader = 1 << 2,
1071 block_kind_loop_header = 1 << 3,
1072 block_kind_loop_exit = 1 << 4,
1073 block_kind_continue = 1 << 5,
1074 block_kind_break = 1 << 6,
1075 block_kind_continue_or_break = 1 << 7,
1076 block_kind_discard = 1 << 8,
1077 block_kind_branch = 1 << 9,
1078 block_kind_merge = 1 << 10,
1079 block_kind_invert = 1 << 11,
1080 block_kind_uses_discard_if = 1 << 12,
1081 block_kind_needs_lowering = 1 << 13,
1082 block_kind_uses_demote = 1 << 14,
1083 block_kind_export_end = 1 << 15,
1084 };
1085
1086
1087 struct RegisterDemand {
1088 constexpr RegisterDemand() = default;
1089 constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1090 : vgpr{v}, sgpr{s} {}
1091 int16_t vgpr = 0;
1092 int16_t sgpr = 0;
1093
1094 constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1095 return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1096 }
1097
1098 constexpr bool exceeds(const RegisterDemand other) const noexcept {
1099 return vgpr > other.vgpr || sgpr > other.sgpr;
1100 }
1101
1102 constexpr RegisterDemand operator+(const Temp t) const noexcept {
1103 if (t.type() == RegType::sgpr)
1104 return RegisterDemand( vgpr, sgpr + t.size() );
1105 else
1106 return RegisterDemand( vgpr + t.size(), sgpr );
1107 }
1108
1109 constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1110 return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1111 }
1112
1113 constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1114 return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1115 }
1116
1117 constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1118 vgpr += other.vgpr;
1119 sgpr += other.sgpr;
1120 return *this;
1121 }
1122
1123 constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1124 vgpr -= other.vgpr;
1125 sgpr -= other.sgpr;
1126 return *this;
1127 }
1128
1129 constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1130 if (t.type() == RegType::sgpr)
1131 sgpr += t.size();
1132 else
1133 vgpr += t.size();
1134 return *this;
1135 }
1136
1137 constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1138 if (t.type() == RegType::sgpr)
1139 sgpr -= t.size();
1140 else
1141 vgpr -= t.size();
1142 return *this;
1143 }
1144
1145 constexpr void update(const RegisterDemand other) noexcept {
1146 vgpr = std::max(vgpr, other.vgpr);
1147 sgpr = std::max(sgpr, other.sgpr);
1148 }
1149
1150 };
1151
1152 /* CFG */
1153 struct Block {
1154 float_mode fp_mode;
1155 unsigned index;
1156 unsigned offset = 0;
1157 std::vector<aco_ptr<Instruction>> instructions;
1158 std::vector<unsigned> logical_preds;
1159 std::vector<unsigned> linear_preds;
1160 std::vector<unsigned> logical_succs;
1161 std::vector<unsigned> linear_succs;
1162 RegisterDemand register_demand = RegisterDemand();
1163 uint16_t loop_nest_depth = 0;
1164 uint16_t kind = 0;
1165 int logical_idom = -1;
1166 int linear_idom = -1;
1167 Temp live_out_exec = Temp();
1168
1169 /* this information is needed for predecessors to blocks with phis when
1170 * moving out of ssa */
1171 bool scc_live_out = false;
1172 PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1173
1174 Block(unsigned idx) : index(idx) {}
1175 Block() : index(0) {}
1176 };
1177
1178 using Stage = uint16_t;
1179
1180 /* software stages */
1181 static constexpr Stage sw_vs = 1 << 0;
1182 static constexpr Stage sw_gs = 1 << 1;
1183 static constexpr Stage sw_tcs = 1 << 2;
1184 static constexpr Stage sw_tes = 1 << 3;
1185 static constexpr Stage sw_fs = 1 << 4;
1186 static constexpr Stage sw_cs = 1 << 5;
1187 static constexpr Stage sw_gs_copy = 1 << 6;
1188 static constexpr Stage sw_mask = 0x7f;
1189
1190 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1191 static constexpr Stage hw_vs = 1 << 7;
1192 static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1193 static constexpr Stage hw_gs = 1 << 9;
1194 static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1195 static constexpr Stage hw_hs = 1 << 11;
1196 static constexpr Stage hw_fs = 1 << 12;
1197 static constexpr Stage hw_cs = 1 << 13;
1198 static constexpr Stage hw_mask = 0x7f << 7;
1199
1200 /* possible settings of Program::stage */
1201 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1202 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1203 static constexpr Stage compute_cs = sw_cs | hw_cs;
1204 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1205 static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
1206 /* GFX10/NGG */
1207 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1208 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1209 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1210 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1211 /* GFX9 (and GFX10 if NGG isn't used) */
1212 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1213 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1214 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1215 /* pre-GFX9 */
1216 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1217 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1218 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1219 static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
1220 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1221
1222 class Program final {
1223 public:
1224 float_mode next_fp_mode;
1225 std::vector<Block> blocks;
1226 RegisterDemand max_reg_demand = RegisterDemand();
1227 uint16_t num_waves = 0;
1228 uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1229 ac_shader_config* config;
1230 struct radv_shader_info *info;
1231 enum chip_class chip_class;
1232 enum radeon_family family;
1233 unsigned wave_size;
1234 RegClass lane_mask;
1235 Stage stage; /* Stage */
1236 bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1237 bool needs_wqm = false; /* there exists a p_wqm instruction */
1238 bool wb_smem_l1_on_end = false;
1239
1240 std::vector<uint8_t> constant_data;
1241 Temp private_segment_buffer;
1242 Temp scratch_offset;
1243
1244 uint16_t min_waves = 0;
1245 uint16_t lds_alloc_granule;
1246 uint32_t lds_limit; /* in bytes */
1247 bool has_16bank_lds;
1248 uint16_t vgpr_limit;
1249 uint16_t sgpr_limit;
1250 uint16_t physical_sgprs;
1251 uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1252 uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1253 unsigned workgroup_size; /* if known; otherwise UINT_MAX */
1254
1255 bool xnack_enabled = false;
1256
1257 bool needs_vcc = false;
1258 bool needs_flat_scr = false;
1259
1260 uint32_t allocateId()
1261 {
1262 assert(allocationID <= 16777215);
1263 return allocationID++;
1264 }
1265
1266 uint32_t peekAllocationId()
1267 {
1268 return allocationID;
1269 }
1270
1271 void setAllocationId(uint32_t id)
1272 {
1273 allocationID = id;
1274 }
1275
1276 Block* create_and_insert_block() {
1277 blocks.emplace_back(blocks.size());
1278 blocks.back().fp_mode = next_fp_mode;
1279 return &blocks.back();
1280 }
1281
1282 Block* insert_block(Block&& block) {
1283 block.index = blocks.size();
1284 block.fp_mode = next_fp_mode;
1285 blocks.emplace_back(std::move(block));
1286 return &blocks.back();
1287 }
1288
1289 private:
1290 uint32_t allocationID = 1;
1291 };
1292
1293 struct live {
1294 /* live temps out per block */
1295 std::vector<std::set<Temp>> live_out;
1296 /* register demand (sgpr/vgpr) per instruction per block */
1297 std::vector<std::vector<RegisterDemand>> register_demand;
1298 };
1299
1300 void select_program(Program *program,
1301 unsigned shader_count,
1302 struct nir_shader *const *shaders,
1303 ac_shader_config* config,
1304 struct radv_shader_args *args);
1305 void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
1306 ac_shader_config* config,
1307 struct radv_shader_args *args);
1308
1309 void lower_wqm(Program* program, live& live_vars,
1310 const struct radv_nir_compiler_options *options);
1311 void lower_bool_phis(Program* program);
1312 void calc_min_waves(Program* program);
1313 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1314 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1315 std::vector<uint16_t> dead_code_analysis(Program *program);
1316 void dominator_tree(Program* program);
1317 void insert_exec_mask(Program *program);
1318 void value_numbering(Program* program);
1319 void optimize(Program* program);
1320 void setup_reduce_temp(Program* program);
1321 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1322 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1323 void ssa_elimination(Program* program);
1324 void lower_to_hw_instr(Program* program);
1325 void schedule_program(Program* program, live& live_vars);
1326 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1327 void insert_wait_states(Program* program);
1328 void insert_NOPs(Program* program);
1329 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1330 void print_asm(Program *program, std::vector<uint32_t>& binary,
1331 unsigned exec_size, std::ostream& out);
1332 void validate(Program* program, FILE *output);
1333 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1334 #ifndef NDEBUG
1335 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1336 #else
1337 #define perfwarn(program, cond, msg, ...) do {} while(0)
1338 #endif
1339
1340 void aco_print_instr(Instruction *instr, FILE *output);
1341 void aco_print_program(Program *program, FILE *output);
1342
1343 /* utilities for dealing with register demand */
1344 RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
1345 RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
1346 RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, aco_ptr<Instruction>& instr_before);
1347
1348 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1349 uint16_t get_extra_sgprs(Program *program);
1350
1351 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1352 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1353 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1354
1355 /* return number of addressable sgprs/vgprs for max_waves */
1356 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1357 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1358
1359 typedef struct {
1360 const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1361 const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1362 const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1363 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1364 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1365 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1366 const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1367 const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1368 } Info;
1369
1370 extern const Info instr_info;
1371
1372 }
1373
1374 #endif /* ACO_IR_H */
1375