aco: make PhysReg in units of bytes
[mesa.git] / src / amd / compiler / aco_ir.h
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifndef ACO_IR_H
26 #define ACO_IR_H
27
28 #include <vector>
29 #include <set>
30 #include <bitset>
31 #include <memory>
32
33 #include "nir.h"
34 #include "ac_binary.h"
35 #include "amd_family.h"
36 #include "aco_opcodes.h"
37 #include "aco_util.h"
38
39 struct radv_nir_compiler_options;
40 struct radv_shader_args;
41 struct radv_shader_info;
42
43 namespace aco {
44
45 extern uint64_t debug_flags;
46
47 enum {
48 DEBUG_VALIDATE = 0x1,
49 DEBUG_VALIDATE_RA = 0x2,
50 DEBUG_PERFWARN = 0x4,
51 };
52
53 /**
54 * Representation of the instruction's microcode encoding format
55 * Note: Some Vector ALU Formats can be combined, such that:
56 * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
57 * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
58 * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
59 *
60 * (*) The same is applicable for VOP1 and VOPC instructions.
61 */
62 enum class Format : std::uint16_t {
63 /* Pseudo Instruction Format */
64 PSEUDO = 0,
65 /* Scalar ALU & Control Formats */
66 SOP1 = 1,
67 SOP2 = 2,
68 SOPK = 3,
69 SOPP = 4,
70 SOPC = 5,
71 /* Scalar Memory Format */
72 SMEM = 6,
73 /* LDS/GDS Format */
74 DS = 8,
75 /* Vector Memory Buffer Formats */
76 MTBUF = 9,
77 MUBUF = 10,
78 /* Vector Memory Image Format */
79 MIMG = 11,
80 /* Export Format */
81 EXP = 12,
82 /* Flat Formats */
83 FLAT = 13,
84 GLOBAL = 14,
85 SCRATCH = 15,
86
87 PSEUDO_BRANCH = 16,
88 PSEUDO_BARRIER = 17,
89 PSEUDO_REDUCTION = 18,
90
91 /* Vector ALU Formats */
92 VOP1 = 1 << 8,
93 VOP2 = 1 << 9,
94 VOPC = 1 << 10,
95 VOP3 = 1 << 11,
96 VOP3A = 1 << 11,
97 VOP3B = 1 << 11,
98 VOP3P = 1 << 12,
99 /* Vector Parameter Interpolation Format */
100 VINTRP = 1 << 13,
101 DPP = 1 << 14,
102 SDWA = 1 << 15,
103 };
104
105 enum barrier_interaction : uint8_t {
106 barrier_none = 0,
107 barrier_buffer = 0x1,
108 barrier_image = 0x2,
109 barrier_atomic = 0x4,
110 barrier_shared = 0x8,
111 /* used for geometry shaders to ensure vertex data writes are before the
112 * GS_DONE s_sendmsg. */
113 barrier_gs_data = 0x10,
114 /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
115 barrier_gs_sendmsg = 0x20,
116 /* used by barriers. created by s_barrier */
117 barrier_barrier = 0x40,
118 barrier_count = 6,
119 };
120
121 enum fp_round {
122 fp_round_ne = 0,
123 fp_round_pi = 1,
124 fp_round_ni = 2,
125 fp_round_tz = 3,
126 };
127
128 enum fp_denorm {
129 /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
130 * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
131 fp_denorm_flush = 0x0,
132 fp_denorm_keep = 0x3,
133 };
134
135 struct float_mode {
136 /* matches encoding of the MODE register */
137 union {
138 struct {
139 fp_round round32:2;
140 fp_round round16_64:2;
141 unsigned denorm32:2;
142 unsigned denorm16_64:2;
143 };
144 uint8_t val = 0;
145 };
146 /* if false, optimizations which may remove infs/nan/-0.0 can be done */
147 bool preserve_signed_zero_inf_nan32:1;
148 bool preserve_signed_zero_inf_nan16_64:1;
149 /* if false, optimizations which may remove denormal flushing can be done */
150 bool must_flush_denorms32:1;
151 bool must_flush_denorms16_64:1;
152 bool care_about_round32:1;
153 bool care_about_round16_64:1;
154
155 /* Returns true if instructions using the mode "other" can safely use the
156 * current one instead. */
157 bool canReplace(float_mode other) const noexcept {
158 return val == other.val &&
159 (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
160 (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
161 (must_flush_denorms32 || !other.must_flush_denorms32) &&
162 (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
163 (care_about_round32 || !other.care_about_round32) &&
164 (care_about_round16_64 || !other.care_about_round16_64);
165 }
166 };
167
168 constexpr Format asVOP3(Format format) {
169 return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
170 };
171
172 enum class RegType {
173 none = 0,
174 sgpr,
175 vgpr,
176 linear_vgpr,
177 };
178
179 struct RegClass {
180
181 enum RC : uint8_t {
182 s1 = 1,
183 s2 = 2,
184 s3 = 3,
185 s4 = 4,
186 s6 = 6,
187 s8 = 8,
188 s16 = 16,
189 v1 = s1 | (1 << 5),
190 v2 = s2 | (1 << 5),
191 v3 = s3 | (1 << 5),
192 v4 = s4 | (1 << 5),
193 v5 = 5 | (1 << 5),
194 v6 = 6 | (1 << 5),
195 v7 = 7 | (1 << 5),
196 v8 = 8 | (1 << 5),
197 /* these are used for WWM and spills to vgpr */
198 v1_linear = v1 | (1 << 6),
199 v2_linear = v2 | (1 << 6),
200 };
201
202 RegClass() = default;
203 constexpr RegClass(RC rc)
204 : rc(rc) {}
205 constexpr RegClass(RegType type, unsigned size)
206 : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
207
208 constexpr operator RC() const { return rc; }
209 explicit operator bool() = delete;
210
211 constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
212 constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
213 constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
214 constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
215
216 private:
217 RC rc;
218 };
219
220 /* transitional helper expressions */
221 static constexpr RegClass s1{RegClass::s1};
222 static constexpr RegClass s2{RegClass::s2};
223 static constexpr RegClass s3{RegClass::s3};
224 static constexpr RegClass s4{RegClass::s4};
225 static constexpr RegClass s8{RegClass::s8};
226 static constexpr RegClass s16{RegClass::s16};
227 static constexpr RegClass v1{RegClass::v1};
228 static constexpr RegClass v2{RegClass::v2};
229 static constexpr RegClass v3{RegClass::v3};
230 static constexpr RegClass v4{RegClass::v4};
231 static constexpr RegClass v5{RegClass::v5};
232 static constexpr RegClass v6{RegClass::v6};
233 static constexpr RegClass v7{RegClass::v7};
234 static constexpr RegClass v8{RegClass::v8};
235
236 /**
237 * Temp Class
238 * Each temporary virtual register has a
239 * register class (i.e. size and type)
240 * and SSA id.
241 */
242 struct Temp {
243 Temp() = default;
244 constexpr Temp(uint32_t id, RegClass cls) noexcept
245 : id_(id), reg_class(cls) {}
246
247 constexpr uint32_t id() const noexcept { return id_; }
248 constexpr RegClass regClass() const noexcept { return reg_class; }
249
250 constexpr unsigned size() const noexcept { return reg_class.size(); }
251 constexpr RegType type() const noexcept { return reg_class.type(); }
252 constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
253
254 constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
255 constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
256 constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
257
258 private:
259 uint32_t id_:24;
260 RegClass reg_class;
261 };
262
263 /**
264 * PhysReg
265 * Represents the physical register for each
266 * Operand and Definition.
267 */
268 struct PhysReg {
269 constexpr PhysReg() = default;
270 explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
271 constexpr unsigned reg() const { return reg_b >> 2; }
272 constexpr unsigned byte() const { return reg_b & 0x3; }
273 constexpr operator unsigned() const { return reg(); }
274
275 uint16_t reg_b = 0;
276 };
277
278 /* helper expressions for special registers */
279 static constexpr PhysReg m0{124};
280 static constexpr PhysReg vcc{106};
281 static constexpr PhysReg vcc_hi{107};
282 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
283 static constexpr PhysReg exec{126};
284 static constexpr PhysReg exec_lo{126};
285 static constexpr PhysReg exec_hi{127};
286 static constexpr PhysReg vccz{251};
287 static constexpr PhysReg execz{252};
288 static constexpr PhysReg scc{253};
289
290 /**
291 * Operand Class
292 * Initially, each Operand refers to either
293 * a temporary virtual register
294 * or to a constant value
295 * Temporary registers get mapped to physical register during RA
296 * Constant values are inlined into the instruction sequence.
297 */
298 class Operand final
299 {
300 public:
301 constexpr Operand()
302 : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
303 isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false),
304 isLateKill_(false) {}
305
306 explicit Operand(Temp r) noexcept
307 {
308 data_.temp = r;
309 if (r.id()) {
310 isTemp_ = true;
311 } else {
312 isUndef_ = true;
313 setFixed(PhysReg{128});
314 }
315 };
316 explicit Operand(uint32_t v, bool is64bit = false) noexcept
317 {
318 data_.i = v;
319 isConstant_ = true;
320 is64BitConst_ = is64bit;
321 if (v <= 64)
322 setFixed(PhysReg{128 + v});
323 else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
324 setFixed(PhysReg{192 - v});
325 else if (v == 0x3f000000) /* 0.5 */
326 setFixed(PhysReg{240});
327 else if (v == 0xbf000000) /* -0.5 */
328 setFixed(PhysReg{241});
329 else if (v == 0x3f800000) /* 1.0 */
330 setFixed(PhysReg{242});
331 else if (v == 0xbf800000) /* -1.0 */
332 setFixed(PhysReg{243});
333 else if (v == 0x40000000) /* 2.0 */
334 setFixed(PhysReg{244});
335 else if (v == 0xc0000000) /* -2.0 */
336 setFixed(PhysReg{245});
337 else if (v == 0x40800000) /* 4.0 */
338 setFixed(PhysReg{246});
339 else if (v == 0xc0800000) /* -4.0 */
340 setFixed(PhysReg{247});
341 else { /* Literal Constant */
342 assert(!is64bit && "attempt to create a 64-bit literal constant");
343 setFixed(PhysReg{255});
344 }
345 };
346 explicit Operand(uint64_t v) noexcept
347 {
348 isConstant_ = true;
349 is64BitConst_ = true;
350 if (v <= 64) {
351 data_.i = (uint32_t) v;
352 setFixed(PhysReg{128 + (uint32_t) v});
353 } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
354 data_.i = (uint32_t) v;
355 setFixed(PhysReg{192 - (uint32_t) v});
356 } else if (v == 0x3FE0000000000000) { /* 0.5 */
357 data_.i = 0x3f000000;
358 setFixed(PhysReg{240});
359 } else if (v == 0xBFE0000000000000) { /* -0.5 */
360 data_.i = 0xbf000000;
361 setFixed(PhysReg{241});
362 } else if (v == 0x3FF0000000000000) { /* 1.0 */
363 data_.i = 0x3f800000;
364 setFixed(PhysReg{242});
365 } else if (v == 0xBFF0000000000000) { /* -1.0 */
366 data_.i = 0xbf800000;
367 setFixed(PhysReg{243});
368 } else if (v == 0x4000000000000000) { /* 2.0 */
369 data_.i = 0x40000000;
370 setFixed(PhysReg{244});
371 } else if (v == 0xC000000000000000) { /* -2.0 */
372 data_.i = 0xc0000000;
373 setFixed(PhysReg{245});
374 } else if (v == 0x4010000000000000) { /* 4.0 */
375 data_.i = 0x40800000;
376 setFixed(PhysReg{246});
377 } else if (v == 0xC010000000000000) { /* -4.0 */
378 data_.i = 0xc0800000;
379 setFixed(PhysReg{247});
380 } else { /* Literal Constant: we don't know if it is a long or double.*/
381 isConstant_ = 0;
382 assert(false && "attempt to create a 64-bit literal constant");
383 }
384 };
385 explicit Operand(RegClass type) noexcept
386 {
387 isUndef_ = true;
388 data_.temp = Temp(0, type);
389 setFixed(PhysReg{128});
390 };
391 explicit Operand(PhysReg reg, RegClass type) noexcept
392 {
393 data_.temp = Temp(0, type);
394 setFixed(reg);
395 }
396
397 constexpr bool isTemp() const noexcept
398 {
399 return isTemp_;
400 }
401
402 constexpr void setTemp(Temp t) noexcept {
403 assert(!isConstant_);
404 isTemp_ = true;
405 data_.temp = t;
406 }
407
408 constexpr Temp getTemp() const noexcept
409 {
410 return data_.temp;
411 }
412
413 constexpr uint32_t tempId() const noexcept
414 {
415 return data_.temp.id();
416 }
417
418 constexpr bool hasRegClass() const noexcept
419 {
420 return isTemp() || isUndefined();
421 }
422
423 constexpr RegClass regClass() const noexcept
424 {
425 return data_.temp.regClass();
426 }
427
428 constexpr unsigned size() const noexcept
429 {
430 if (isConstant())
431 return is64BitConst_ ? 2 : 1;
432 else
433 return data_.temp.size();
434 }
435
436 constexpr bool isFixed() const noexcept
437 {
438 return isFixed_;
439 }
440
441 constexpr PhysReg physReg() const noexcept
442 {
443 return reg_;
444 }
445
446 constexpr void setFixed(PhysReg reg) noexcept
447 {
448 isFixed_ = reg != unsigned(-1);
449 reg_ = reg;
450 }
451
452 constexpr bool isConstant() const noexcept
453 {
454 return isConstant_;
455 }
456
457 constexpr bool isLiteral() const noexcept
458 {
459 return isConstant() && reg_ == 255;
460 }
461
462 constexpr bool isUndefined() const noexcept
463 {
464 return isUndef_;
465 }
466
467 constexpr uint32_t constantValue() const noexcept
468 {
469 return data_.i;
470 }
471
472 constexpr bool constantEquals(uint32_t cmp) const noexcept
473 {
474 return isConstant() && constantValue() == cmp;
475 }
476
477 constexpr uint64_t constantValue64(bool signext=false) const noexcept
478 {
479 if (is64BitConst_) {
480 if (reg_ <= 192)
481 return reg_ - 128;
482 else if (reg_ <= 208)
483 return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
484
485 switch (reg_) {
486 case 240:
487 return 0x3FE0000000000000;
488 case 241:
489 return 0xBFE0000000000000;
490 case 242:
491 return 0x3FF0000000000000;
492 case 243:
493 return 0xBFF0000000000000;
494 case 244:
495 return 0x4000000000000000;
496 case 245:
497 return 0xC000000000000000;
498 case 246:
499 return 0x4010000000000000;
500 case 247:
501 return 0xC010000000000000;
502 }
503 }
504 return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
505 }
506
507 /* Indicates that the killed operand's live range intersects with the
508 * instruction's definitions. Unlike isKill() and isFirstKill(), this is
509 * not set by liveness analysis. */
510 constexpr void setLateKill(bool flag) noexcept
511 {
512 isLateKill_ = flag;
513 }
514
515 constexpr bool isLateKill() const noexcept
516 {
517 return isLateKill_;
518 }
519
520 constexpr void setKill(bool flag) noexcept
521 {
522 isKill_ = flag;
523 if (!flag)
524 setFirstKill(false);
525 }
526
527 constexpr bool isKill() const noexcept
528 {
529 return isKill_ || isFirstKill();
530 }
531
532 constexpr void setFirstKill(bool flag) noexcept
533 {
534 isFirstKill_ = flag;
535 if (flag)
536 setKill(flag);
537 }
538
539 /* When there are multiple operands killing the same temporary,
540 * isFirstKill() is only returns true for the first one. */
541 constexpr bool isFirstKill() const noexcept
542 {
543 return isFirstKill_;
544 }
545
546 constexpr bool isKillBeforeDef() const noexcept
547 {
548 return isKill() && !isLateKill();
549 }
550
551 constexpr bool isFirstKillBeforeDef() const noexcept
552 {
553 return isFirstKill() && !isLateKill();
554 }
555
556 constexpr bool operator == (Operand other) const noexcept
557 {
558 if (other.size() != size())
559 return false;
560 if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
561 return false;
562 if (isFixed() && other.isFixed() && physReg() != other.physReg())
563 return false;
564 if (isLiteral())
565 return other.isLiteral() && other.constantValue() == constantValue();
566 else if (isConstant())
567 return other.isConstant() && other.physReg() == physReg();
568 else if (isUndefined())
569 return other.isUndefined() && other.regClass() == regClass();
570 else
571 return other.isTemp() && other.getTemp() == getTemp();
572 }
573 private:
574 union {
575 uint32_t i;
576 float f;
577 Temp temp = Temp(0, s1);
578 } data_;
579 PhysReg reg_;
580 union {
581 struct {
582 uint8_t isTemp_:1;
583 uint8_t isFixed_:1;
584 uint8_t isConstant_:1;
585 uint8_t isKill_:1;
586 uint8_t isUndef_:1;
587 uint8_t isFirstKill_:1;
588 uint8_t is64BitConst_:1;
589 uint8_t isLateKill_:1;
590 };
591 /* can't initialize bit-fields in c++11, so work around using a union */
592 uint8_t control_ = 0;
593 };
594 };
595
596 /**
597 * Definition Class
598 * Definitions are the results of Instructions
599 * and refer to temporary virtual registers
600 * which are later mapped to physical registers
601 */
602 class Definition final
603 {
604 public:
605 constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
606 Definition(uint32_t index, RegClass type) noexcept
607 : temp(index, type) {}
608 explicit Definition(Temp tmp) noexcept
609 : temp(tmp) {}
610 Definition(PhysReg reg, RegClass type) noexcept
611 : temp(Temp(0, type))
612 {
613 setFixed(reg);
614 }
615 Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
616 : temp(Temp(tmpId, type))
617 {
618 setFixed(reg);
619 }
620
621 constexpr bool isTemp() const noexcept
622 {
623 return tempId() > 0;
624 }
625
626 constexpr Temp getTemp() const noexcept
627 {
628 return temp;
629 }
630
631 constexpr uint32_t tempId() const noexcept
632 {
633 return temp.id();
634 }
635
636 constexpr void setTemp(Temp t) noexcept {
637 temp = t;
638 }
639
640 constexpr RegClass regClass() const noexcept
641 {
642 return temp.regClass();
643 }
644
645 constexpr unsigned size() const noexcept
646 {
647 return temp.size();
648 }
649
650 constexpr bool isFixed() const noexcept
651 {
652 return isFixed_;
653 }
654
655 constexpr PhysReg physReg() const noexcept
656 {
657 return reg_;
658 }
659
660 constexpr void setFixed(PhysReg reg) noexcept
661 {
662 isFixed_ = 1;
663 reg_ = reg;
664 }
665
666 constexpr void setHint(PhysReg reg) noexcept
667 {
668 hasHint_ = 1;
669 reg_ = reg;
670 }
671
672 constexpr bool hasHint() const noexcept
673 {
674 return hasHint_;
675 }
676
677 constexpr void setKill(bool flag) noexcept
678 {
679 isKill_ = flag;
680 }
681
682 constexpr bool isKill() const noexcept
683 {
684 return isKill_;
685 }
686
687 private:
688 Temp temp = Temp(0, s1);
689 PhysReg reg_;
690 union {
691 struct {
692 uint8_t isFixed_:1;
693 uint8_t hasHint_:1;
694 uint8_t isKill_:1;
695 };
696 /* can't initialize bit-fields in c++11, so work around using a union */
697 uint8_t control_ = 0;
698 };
699 };
700
701 class Block;
702
703 struct Instruction {
704 aco_opcode opcode;
705 Format format;
706 uint32_t pass_flags;
707
708 aco::span<Operand> operands;
709 aco::span<Definition> definitions;
710
711 constexpr bool isVALU() const noexcept
712 {
713 return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
714 || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
715 || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
716 || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
717 || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
718 || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
719 }
720
721 constexpr bool isSALU() const noexcept
722 {
723 return format == Format::SOP1 ||
724 format == Format::SOP2 ||
725 format == Format::SOPC ||
726 format == Format::SOPK ||
727 format == Format::SOPP;
728 }
729
730 constexpr bool isVMEM() const noexcept
731 {
732 return format == Format::MTBUF ||
733 format == Format::MUBUF ||
734 format == Format::MIMG;
735 }
736
737 constexpr bool isDPP() const noexcept
738 {
739 return (uint16_t) format & (uint16_t) Format::DPP;
740 }
741
742 constexpr bool isVOP3() const noexcept
743 {
744 return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
745 ((uint16_t) format & (uint16_t) Format::VOP3B) ||
746 format == Format::VOP3P;
747 }
748
749 constexpr bool isSDWA() const noexcept
750 {
751 return (uint16_t) format & (uint16_t) Format::SDWA;
752 }
753
754 constexpr bool isFlatOrGlobal() const noexcept
755 {
756 return format == Format::FLAT || format == Format::GLOBAL;
757 }
758
759 constexpr bool usesModifiers() const noexcept;
760
761 constexpr bool reads_exec() const noexcept
762 {
763 for (const Operand& op : operands) {
764 if (op.isFixed() && op.physReg() == exec)
765 return true;
766 }
767 return false;
768 }
769 };
770
771 struct SOPK_instruction : public Instruction {
772 uint16_t imm;
773 };
774
775 struct SOPP_instruction : public Instruction {
776 uint32_t imm;
777 int block;
778 };
779
780 struct SOPC_instruction : public Instruction {
781 };
782
783 struct SOP1_instruction : public Instruction {
784 };
785
786 struct SOP2_instruction : public Instruction {
787 };
788
789 /**
790 * Scalar Memory Format:
791 * For s_(buffer_)load_dword*:
792 * Operand(0): SBASE - SGPR-pair which provides base address
793 * Operand(1): Offset - immediate (un)signed offset or SGPR
794 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
795 * Operand(n-1): SOffset - SGPR offset (Vega only)
796 *
797 * Having no operands is also valid for instructions such as s_dcache_inv.
798 *
799 */
800 struct SMEM_instruction : public Instruction {
801 bool glc : 1; /* VI+: globally coherent */
802 bool dlc : 1; /* NAVI: device level coherent */
803 bool nv : 1; /* VEGA only: Non-volatile */
804 bool can_reorder : 1;
805 bool disable_wqm : 1;
806 barrier_interaction barrier;
807 };
808
809 struct VOP1_instruction : public Instruction {
810 };
811
812 struct VOP2_instruction : public Instruction {
813 };
814
815 struct VOPC_instruction : public Instruction {
816 };
817
818 struct VOP3A_instruction : public Instruction {
819 bool abs[3];
820 bool neg[3];
821 uint8_t opsel : 4;
822 uint8_t omod : 2;
823 bool clamp : 1;
824 };
825
826 /**
827 * Data Parallel Primitives Format:
828 * This format can be used for VOP1, VOP2 or VOPC instructions.
829 * The swizzle applies to the src0 operand.
830 *
831 */
832 struct DPP_instruction : public Instruction {
833 bool abs[2];
834 bool neg[2];
835 uint16_t dpp_ctrl;
836 uint8_t row_mask : 4;
837 uint8_t bank_mask : 4;
838 bool bound_ctrl : 1;
839 };
840
841 struct Interp_instruction : public Instruction {
842 uint8_t attribute;
843 uint8_t component;
844 };
845
846 /**
847 * Local and Global Data Sharing instructions
848 * Operand(0): ADDR - VGPR which supplies the address.
849 * Operand(1): DATA0 - First data VGPR.
850 * Operand(2): DATA1 - Second data VGPR.
851 * Operand(n-1): M0 - LDS size.
852 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
853 *
854 */
855 struct DS_instruction : public Instruction {
856 int16_t offset0;
857 int8_t offset1;
858 bool gds;
859 };
860
861 /**
862 * Vector Memory Untyped-buffer Instructions
863 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
864 * Operand(1): VADDR - Address source. Can carry an index and/or offset
865 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
866 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
867 *
868 */
869 struct MUBUF_instruction : public Instruction {
870 uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
871 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
872 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
873 bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
874 bool glc : 1; /* globally coherent */
875 bool dlc : 1; /* NAVI: device level coherent */
876 bool slc : 1; /* system level coherent */
877 bool tfe : 1; /* texture fail enable */
878 bool lds : 1; /* Return read-data to LDS instead of VGPRs */
879 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
880 bool can_reorder : 1;
881 barrier_interaction barrier;
882 };
883
884 /**
885 * Vector Memory Typed-buffer Instructions
886 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
887 * Operand(1): VADDR - Address source. Can carry an index and/or offset
888 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
889 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
890 *
891 */
892 struct MTBUF_instruction : public Instruction {
893 uint16_t offset; /* Unsigned byte offset - 12 bit */
894 uint8_t dfmt : 4; /* Data Format of data in memory buffer */
895 uint8_t nfmt : 3; /* Numeric format of data in memory */
896 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
897 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
898 bool glc : 1; /* globally coherent */
899 bool dlc : 1; /* NAVI: device level coherent */
900 bool slc : 1; /* system level coherent */
901 bool tfe : 1; /* texture fail enable */
902 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
903 bool can_reorder : 1;
904 barrier_interaction barrier;
905 };
906
907 /**
908 * Vector Memory Image Instructions
909 * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
910 * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
911 * or VDATA - Vector GPR for write data.
912 * Operand(2): VADDR - Address source. Can carry an offset or an index.
913 * Definition(0): VDATA - Vector GPR for read result.
914 *
915 */
916 struct MIMG_instruction : public Instruction {
917 uint8_t dmask; /* Data VGPR enable mask */
918 uint8_t dim : 3; /* NAVI: dimensionality */
919 bool unrm : 1; /* Force address to be un-normalized */
920 bool dlc : 1; /* NAVI: device level coherent */
921 bool glc : 1; /* globally coherent */
922 bool slc : 1; /* system level coherent */
923 bool tfe : 1; /* texture fail enable */
924 bool da : 1; /* declare an array */
925 bool lwe : 1; /* Force data to be un-normalized */
926 bool r128 : 1; /* NAVI: Texture resource size */
927 bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
928 bool d16 : 1; /* Convert 32-bit data to 16-bit data */
929 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
930 bool can_reorder : 1;
931 barrier_interaction barrier;
932 };
933
934 /**
935 * Flat/Scratch/Global Instructions
936 * Operand(0): ADDR
937 * Operand(1): SADDR
938 * Operand(2) / Definition(0): DATA/VDST
939 *
940 */
941 struct FLAT_instruction : public Instruction {
942 uint16_t offset; /* Vega/Navi only */
943 bool slc : 1; /* system level coherent */
944 bool glc : 1; /* globally coherent */
945 bool dlc : 1; /* NAVI: device level coherent */
946 bool lds : 1;
947 bool nv : 1;
948 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
949 bool can_reorder : 1;
950 barrier_interaction barrier;
951 };
952
953 struct Export_instruction : public Instruction {
954 uint8_t enabled_mask;
955 uint8_t dest;
956 bool compressed : 1;
957 bool done : 1;
958 bool valid_mask : 1;
959 };
960
961 struct Pseudo_instruction : public Instruction {
962 bool tmp_in_scc;
963 PhysReg scratch_sgpr; /* might not be valid if it's not needed */
964 };
965
966 struct Pseudo_branch_instruction : public Instruction {
967 /* target[0] is the block index of the branch target.
968 * For conditional branches, target[1] contains the fall-through alternative.
969 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
970 */
971 uint32_t target[2];
972 };
973
974 struct Pseudo_barrier_instruction : public Instruction {
975 };
976
977 enum ReduceOp {
978 iadd32, iadd64,
979 imul32, imul64,
980 fadd32, fadd64,
981 fmul32, fmul64,
982 imin32, imin64,
983 imax32, imax64,
984 umin32, umin64,
985 umax32, umax64,
986 fmin32, fmin64,
987 fmax32, fmax64,
988 iand32, iand64,
989 ior32, ior64,
990 ixor32, ixor64,
991 gfx10_wave64_bpermute
992 };
993
994 /**
995 * Subgroup Reduction Instructions, everything except for the data to be
996 * reduced and the result as inserted by setup_reduce_temp().
997 * Operand(0): data to be reduced
998 * Operand(1): reduce temporary
999 * Operand(2): vector temporary
1000 * Definition(0): result
1001 * Definition(1): scalar temporary
1002 * Definition(2): scalar identity temporary (not used to store identity on GFX10)
1003 * Definition(3): scc clobber
1004 * Definition(4): vcc clobber
1005 *
1006 */
1007 struct Pseudo_reduction_instruction : public Instruction {
1008 ReduceOp reduce_op;
1009 unsigned cluster_size; // must be 0 for scans
1010 };
1011
1012 struct instr_deleter_functor {
1013 void operator()(void* p) {
1014 free(p);
1015 }
1016 };
1017
1018 template<typename T>
1019 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
1020
1021 template<typename T>
1022 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
1023 {
1024 std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
1025 char *data = (char*) calloc(1, size);
1026 T* inst = (T*) data;
1027
1028 inst->opcode = opcode;
1029 inst->format = format;
1030
1031 uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
1032 inst->operands = aco::span<Operand>(operands_offset, num_operands);
1033 uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
1034 inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
1035
1036 return inst;
1037 }
1038
1039 constexpr bool Instruction::usesModifiers() const noexcept
1040 {
1041 if (isDPP() || isSDWA())
1042 return true;
1043 if (!isVOP3())
1044 return false;
1045 const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
1046 for (unsigned i = 0; i < operands.size(); i++) {
1047 if (vop3->abs[i] || vop3->neg[i])
1048 return true;
1049 }
1050 return vop3->opsel || vop3->clamp || vop3->omod;
1051 }
1052
1053 constexpr bool is_phi(Instruction* instr)
1054 {
1055 return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1056 }
1057
1058 static inline bool is_phi(aco_ptr<Instruction>& instr)
1059 {
1060 return is_phi(instr.get());
1061 }
1062
1063 barrier_interaction get_barrier_interaction(Instruction* instr);
1064
1065 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
1066
1067 enum block_kind {
1068 /* uniform indicates that leaving this block,
1069 * all actives lanes stay active */
1070 block_kind_uniform = 1 << 0,
1071 block_kind_top_level = 1 << 1,
1072 block_kind_loop_preheader = 1 << 2,
1073 block_kind_loop_header = 1 << 3,
1074 block_kind_loop_exit = 1 << 4,
1075 block_kind_continue = 1 << 5,
1076 block_kind_break = 1 << 6,
1077 block_kind_continue_or_break = 1 << 7,
1078 block_kind_discard = 1 << 8,
1079 block_kind_branch = 1 << 9,
1080 block_kind_merge = 1 << 10,
1081 block_kind_invert = 1 << 11,
1082 block_kind_uses_discard_if = 1 << 12,
1083 block_kind_needs_lowering = 1 << 13,
1084 block_kind_uses_demote = 1 << 14,
1085 block_kind_export_end = 1 << 15,
1086 };
1087
1088
1089 struct RegisterDemand {
1090 constexpr RegisterDemand() = default;
1091 constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1092 : vgpr{v}, sgpr{s} {}
1093 int16_t vgpr = 0;
1094 int16_t sgpr = 0;
1095
1096 constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1097 return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1098 }
1099
1100 constexpr bool exceeds(const RegisterDemand other) const noexcept {
1101 return vgpr > other.vgpr || sgpr > other.sgpr;
1102 }
1103
1104 constexpr RegisterDemand operator+(const Temp t) const noexcept {
1105 if (t.type() == RegType::sgpr)
1106 return RegisterDemand( vgpr, sgpr + t.size() );
1107 else
1108 return RegisterDemand( vgpr + t.size(), sgpr );
1109 }
1110
1111 constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1112 return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1113 }
1114
1115 constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1116 return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1117 }
1118
1119 constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1120 vgpr += other.vgpr;
1121 sgpr += other.sgpr;
1122 return *this;
1123 }
1124
1125 constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1126 vgpr -= other.vgpr;
1127 sgpr -= other.sgpr;
1128 return *this;
1129 }
1130
1131 constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1132 if (t.type() == RegType::sgpr)
1133 sgpr += t.size();
1134 else
1135 vgpr += t.size();
1136 return *this;
1137 }
1138
1139 constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1140 if (t.type() == RegType::sgpr)
1141 sgpr -= t.size();
1142 else
1143 vgpr -= t.size();
1144 return *this;
1145 }
1146
1147 constexpr void update(const RegisterDemand other) noexcept {
1148 vgpr = std::max(vgpr, other.vgpr);
1149 sgpr = std::max(sgpr, other.sgpr);
1150 }
1151
1152 };
1153
1154 /* CFG */
1155 struct Block {
1156 float_mode fp_mode;
1157 unsigned index;
1158 unsigned offset = 0;
1159 std::vector<aco_ptr<Instruction>> instructions;
1160 std::vector<unsigned> logical_preds;
1161 std::vector<unsigned> linear_preds;
1162 std::vector<unsigned> logical_succs;
1163 std::vector<unsigned> linear_succs;
1164 RegisterDemand register_demand = RegisterDemand();
1165 uint16_t loop_nest_depth = 0;
1166 uint16_t kind = 0;
1167 int logical_idom = -1;
1168 int linear_idom = -1;
1169 Temp live_out_exec = Temp();
1170
1171 /* this information is needed for predecessors to blocks with phis when
1172 * moving out of ssa */
1173 bool scc_live_out = false;
1174 PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1175
1176 Block(unsigned idx) : index(idx) {}
1177 Block() : index(0) {}
1178 };
1179
1180 using Stage = uint16_t;
1181
1182 /* software stages */
1183 static constexpr Stage sw_vs = 1 << 0;
1184 static constexpr Stage sw_gs = 1 << 1;
1185 static constexpr Stage sw_tcs = 1 << 2;
1186 static constexpr Stage sw_tes = 1 << 3;
1187 static constexpr Stage sw_fs = 1 << 4;
1188 static constexpr Stage sw_cs = 1 << 5;
1189 static constexpr Stage sw_gs_copy = 1 << 6;
1190 static constexpr Stage sw_mask = 0x7f;
1191
1192 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1193 static constexpr Stage hw_vs = 1 << 7;
1194 static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1195 static constexpr Stage hw_gs = 1 << 9;
1196 static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1197 static constexpr Stage hw_hs = 1 << 11;
1198 static constexpr Stage hw_fs = 1 << 12;
1199 static constexpr Stage hw_cs = 1 << 13;
1200 static constexpr Stage hw_mask = 0x7f << 7;
1201
1202 /* possible settings of Program::stage */
1203 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1204 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1205 static constexpr Stage compute_cs = sw_cs | hw_cs;
1206 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1207 static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
1208 /* GFX10/NGG */
1209 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1210 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1211 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1212 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1213 /* GFX9 (and GFX10 if NGG isn't used) */
1214 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1215 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1216 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1217 /* pre-GFX9 */
1218 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1219 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1220 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1221 static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
1222 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1223
1224 enum statistic {
1225 statistic_hash,
1226 statistic_instructions,
1227 statistic_copies,
1228 statistic_branches,
1229 statistic_cycles,
1230 statistic_vmem_clauses,
1231 statistic_smem_clauses,
1232 statistic_vmem_score,
1233 statistic_smem_score,
1234 statistic_sgpr_presched,
1235 statistic_vgpr_presched,
1236 num_statistics
1237 };
1238
1239 class Program final {
1240 public:
1241 float_mode next_fp_mode;
1242 std::vector<Block> blocks;
1243 RegisterDemand max_reg_demand = RegisterDemand();
1244 uint16_t num_waves = 0;
1245 uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1246 ac_shader_config* config;
1247 struct radv_shader_info *info;
1248 enum chip_class chip_class;
1249 enum radeon_family family;
1250 unsigned wave_size;
1251 RegClass lane_mask;
1252 Stage stage; /* Stage */
1253 bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1254 bool needs_wqm = false; /* there exists a p_wqm instruction */
1255 bool wb_smem_l1_on_end = false;
1256
1257 std::vector<uint8_t> constant_data;
1258 Temp private_segment_buffer;
1259 Temp scratch_offset;
1260
1261 uint16_t min_waves = 0;
1262 uint16_t lds_alloc_granule;
1263 uint32_t lds_limit; /* in bytes */
1264 bool has_16bank_lds;
1265 uint16_t vgpr_limit;
1266 uint16_t sgpr_limit;
1267 uint16_t physical_sgprs;
1268 uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1269 uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1270 unsigned workgroup_size; /* if known; otherwise UINT_MAX */
1271
1272 bool xnack_enabled = false;
1273
1274 bool needs_vcc = false;
1275 bool needs_flat_scr = false;
1276
1277 bool collect_statistics = false;
1278 uint32_t statistics[num_statistics];
1279
1280 uint32_t allocateId()
1281 {
1282 assert(allocationID <= 16777215);
1283 return allocationID++;
1284 }
1285
1286 uint32_t peekAllocationId()
1287 {
1288 return allocationID;
1289 }
1290
1291 void setAllocationId(uint32_t id)
1292 {
1293 allocationID = id;
1294 }
1295
1296 Block* create_and_insert_block() {
1297 blocks.emplace_back(blocks.size());
1298 blocks.back().fp_mode = next_fp_mode;
1299 return &blocks.back();
1300 }
1301
1302 Block* insert_block(Block&& block) {
1303 block.index = blocks.size();
1304 block.fp_mode = next_fp_mode;
1305 blocks.emplace_back(std::move(block));
1306 return &blocks.back();
1307 }
1308
1309 private:
1310 uint32_t allocationID = 1;
1311 };
1312
1313 struct live {
1314 /* live temps out per block */
1315 std::vector<std::set<Temp>> live_out;
1316 /* register demand (sgpr/vgpr) per instruction per block */
1317 std::vector<std::vector<RegisterDemand>> register_demand;
1318 };
1319
1320 void select_program(Program *program,
1321 unsigned shader_count,
1322 struct nir_shader *const *shaders,
1323 ac_shader_config* config,
1324 struct radv_shader_args *args);
1325 void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
1326 ac_shader_config* config,
1327 struct radv_shader_args *args);
1328
1329 void lower_wqm(Program* program, live& live_vars,
1330 const struct radv_nir_compiler_options *options);
1331 void lower_bool_phis(Program* program);
1332 void calc_min_waves(Program* program);
1333 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1334 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1335 std::vector<uint16_t> dead_code_analysis(Program *program);
1336 void dominator_tree(Program* program);
1337 void insert_exec_mask(Program *program);
1338 void value_numbering(Program* program);
1339 void optimize(Program* program);
1340 void setup_reduce_temp(Program* program);
1341 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1342 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1343 void ssa_elimination(Program* program);
1344 void lower_to_hw_instr(Program* program);
1345 void schedule_program(Program* program, live& live_vars);
1346 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1347 void insert_wait_states(Program* program);
1348 void insert_NOPs(Program* program);
1349 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1350 void print_asm(Program *program, std::vector<uint32_t>& binary,
1351 unsigned exec_size, std::ostream& out);
1352 void validate(Program* program, FILE *output);
1353 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1354 #ifndef NDEBUG
1355 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1356 #else
1357 #define perfwarn(program, cond, msg, ...) do {} while(0)
1358 #endif
1359
1360 void collect_presched_stats(Program *program);
1361 void collect_preasm_stats(Program *program);
1362 void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
1363
1364 void aco_print_instr(Instruction *instr, FILE *output);
1365 void aco_print_program(Program *program, FILE *output);
1366
1367 /* utilities for dealing with register demand */
1368 RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
1369 RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
1370 RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, aco_ptr<Instruction>& instr_before);
1371
1372 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1373 uint16_t get_extra_sgprs(Program *program);
1374
1375 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1376 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1377 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1378
1379 /* return number of addressable sgprs/vgprs for max_waves */
1380 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1381 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1382
1383 typedef struct {
1384 const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1385 const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1386 const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1387 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1388 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1389 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1390 const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1391 const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1392 } Info;
1393
1394 extern const Info instr_info;
1395
1396 }
1397
1398 #endif /* ACO_IR_H */
1399