aco: add comparison operators for PhysReg
[mesa.git] / src / amd / compiler / aco_ir.h
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifndef ACO_IR_H
26 #define ACO_IR_H
27
28 #include <vector>
29 #include <set>
30 #include <bitset>
31 #include <memory>
32
33 #include "nir.h"
34 #include "ac_binary.h"
35 #include "amd_family.h"
36 #include "aco_opcodes.h"
37 #include "aco_util.h"
38
39 struct radv_nir_compiler_options;
40 struct radv_shader_args;
41 struct radv_shader_info;
42
43 namespace aco {
44
45 extern uint64_t debug_flags;
46
47 enum {
48 DEBUG_VALIDATE = 0x1,
49 DEBUG_VALIDATE_RA = 0x2,
50 DEBUG_PERFWARN = 0x4,
51 };
52
53 /**
54 * Representation of the instruction's microcode encoding format
55 * Note: Some Vector ALU Formats can be combined, such that:
56 * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
57 * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
58 * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
59 *
60 * (*) The same is applicable for VOP1 and VOPC instructions.
61 */
62 enum class Format : std::uint16_t {
63 /* Pseudo Instruction Format */
64 PSEUDO = 0,
65 /* Scalar ALU & Control Formats */
66 SOP1 = 1,
67 SOP2 = 2,
68 SOPK = 3,
69 SOPP = 4,
70 SOPC = 5,
71 /* Scalar Memory Format */
72 SMEM = 6,
73 /* LDS/GDS Format */
74 DS = 8,
75 /* Vector Memory Buffer Formats */
76 MTBUF = 9,
77 MUBUF = 10,
78 /* Vector Memory Image Format */
79 MIMG = 11,
80 /* Export Format */
81 EXP = 12,
82 /* Flat Formats */
83 FLAT = 13,
84 GLOBAL = 14,
85 SCRATCH = 15,
86
87 PSEUDO_BRANCH = 16,
88 PSEUDO_BARRIER = 17,
89 PSEUDO_REDUCTION = 18,
90
91 /* Vector ALU Formats */
92 VOP1 = 1 << 8,
93 VOP2 = 1 << 9,
94 VOPC = 1 << 10,
95 VOP3 = 1 << 11,
96 VOP3A = 1 << 11,
97 VOP3B = 1 << 11,
98 VOP3P = 1 << 12,
99 /* Vector Parameter Interpolation Format */
100 VINTRP = 1 << 13,
101 DPP = 1 << 14,
102 SDWA = 1 << 15,
103 };
104
105 enum barrier_interaction : uint8_t {
106 barrier_none = 0,
107 barrier_buffer = 0x1,
108 barrier_image = 0x2,
109 barrier_atomic = 0x4,
110 barrier_shared = 0x8,
111 /* used for geometry shaders to ensure vertex data writes are before the
112 * GS_DONE s_sendmsg. */
113 barrier_gs_data = 0x10,
114 /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
115 barrier_gs_sendmsg = 0x20,
116 /* used by barriers. created by s_barrier */
117 barrier_barrier = 0x40,
118 barrier_count = 6,
119 };
120
121 enum fp_round {
122 fp_round_ne = 0,
123 fp_round_pi = 1,
124 fp_round_ni = 2,
125 fp_round_tz = 3,
126 };
127
128 enum fp_denorm {
129 /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
130 * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
131 fp_denorm_flush = 0x0,
132 fp_denorm_keep = 0x3,
133 };
134
135 struct float_mode {
136 /* matches encoding of the MODE register */
137 union {
138 struct {
139 fp_round round32:2;
140 fp_round round16_64:2;
141 unsigned denorm32:2;
142 unsigned denorm16_64:2;
143 };
144 uint8_t val = 0;
145 };
146 /* if false, optimizations which may remove infs/nan/-0.0 can be done */
147 bool preserve_signed_zero_inf_nan32:1;
148 bool preserve_signed_zero_inf_nan16_64:1;
149 /* if false, optimizations which may remove denormal flushing can be done */
150 bool must_flush_denorms32:1;
151 bool must_flush_denorms16_64:1;
152 bool care_about_round32:1;
153 bool care_about_round16_64:1;
154
155 /* Returns true if instructions using the mode "other" can safely use the
156 * current one instead. */
157 bool canReplace(float_mode other) const noexcept {
158 return val == other.val &&
159 (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
160 (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
161 (must_flush_denorms32 || !other.must_flush_denorms32) &&
162 (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
163 (care_about_round32 || !other.care_about_round32) &&
164 (care_about_round16_64 || !other.care_about_round16_64);
165 }
166 };
167
168 constexpr Format asVOP3(Format format) {
169 return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
170 };
171
172 enum class RegType {
173 none = 0,
174 sgpr,
175 vgpr,
176 linear_vgpr,
177 };
178
179 struct RegClass {
180
181 enum RC : uint8_t {
182 s1 = 1,
183 s2 = 2,
184 s3 = 3,
185 s4 = 4,
186 s6 = 6,
187 s8 = 8,
188 s16 = 16,
189 v1 = s1 | (1 << 5),
190 v2 = s2 | (1 << 5),
191 v3 = s3 | (1 << 5),
192 v4 = s4 | (1 << 5),
193 v5 = 5 | (1 << 5),
194 v6 = 6 | (1 << 5),
195 v7 = 7 | (1 << 5),
196 v8 = 8 | (1 << 5),
197 /* these are used for WWM and spills to vgpr */
198 v1_linear = v1 | (1 << 6),
199 v2_linear = v2 | (1 << 6),
200 };
201
202 RegClass() = default;
203 constexpr RegClass(RC rc)
204 : rc(rc) {}
205 constexpr RegClass(RegType type, unsigned size)
206 : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
207
208 constexpr operator RC() const { return rc; }
209 explicit operator bool() = delete;
210
211 constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
212 constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
213 constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
214 constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
215
216 private:
217 RC rc;
218 };
219
220 /* transitional helper expressions */
221 static constexpr RegClass s1{RegClass::s1};
222 static constexpr RegClass s2{RegClass::s2};
223 static constexpr RegClass s3{RegClass::s3};
224 static constexpr RegClass s4{RegClass::s4};
225 static constexpr RegClass s8{RegClass::s8};
226 static constexpr RegClass s16{RegClass::s16};
227 static constexpr RegClass v1{RegClass::v1};
228 static constexpr RegClass v2{RegClass::v2};
229 static constexpr RegClass v3{RegClass::v3};
230 static constexpr RegClass v4{RegClass::v4};
231 static constexpr RegClass v5{RegClass::v5};
232 static constexpr RegClass v6{RegClass::v6};
233 static constexpr RegClass v7{RegClass::v7};
234 static constexpr RegClass v8{RegClass::v8};
235
236 /**
237 * Temp Class
238 * Each temporary virtual register has a
239 * register class (i.e. size and type)
240 * and SSA id.
241 */
242 struct Temp {
243 Temp() = default;
244 constexpr Temp(uint32_t id, RegClass cls) noexcept
245 : id_(id), reg_class(cls) {}
246
247 constexpr uint32_t id() const noexcept { return id_; }
248 constexpr RegClass regClass() const noexcept { return reg_class; }
249
250 constexpr unsigned size() const noexcept { return reg_class.size(); }
251 constexpr RegType type() const noexcept { return reg_class.type(); }
252 constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
253
254 constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
255 constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
256 constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
257
258 private:
259 uint32_t id_:24;
260 RegClass reg_class;
261 };
262
263 /**
264 * PhysReg
265 * Represents the physical register for each
266 * Operand and Definition.
267 */
268 struct PhysReg {
269 constexpr PhysReg() = default;
270 explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
271 constexpr unsigned reg() const { return reg_b >> 2; }
272 constexpr unsigned byte() const { return reg_b & 0x3; }
273 constexpr operator unsigned() const { return reg(); }
274 constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
275 constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
276 constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
277
278 uint16_t reg_b = 0;
279 };
280
281 /* helper expressions for special registers */
282 static constexpr PhysReg m0{124};
283 static constexpr PhysReg vcc{106};
284 static constexpr PhysReg vcc_hi{107};
285 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
286 static constexpr PhysReg exec{126};
287 static constexpr PhysReg exec_lo{126};
288 static constexpr PhysReg exec_hi{127};
289 static constexpr PhysReg vccz{251};
290 static constexpr PhysReg execz{252};
291 static constexpr PhysReg scc{253};
292
293 /**
294 * Operand Class
295 * Initially, each Operand refers to either
296 * a temporary virtual register
297 * or to a constant value
298 * Temporary registers get mapped to physical register during RA
299 * Constant values are inlined into the instruction sequence.
300 */
301 class Operand final
302 {
303 public:
304 constexpr Operand()
305 : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
306 isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false),
307 isLateKill_(false) {}
308
309 explicit Operand(Temp r) noexcept
310 {
311 data_.temp = r;
312 if (r.id()) {
313 isTemp_ = true;
314 } else {
315 isUndef_ = true;
316 setFixed(PhysReg{128});
317 }
318 };
319 explicit Operand(uint32_t v, bool is64bit = false) noexcept
320 {
321 data_.i = v;
322 isConstant_ = true;
323 is64BitConst_ = is64bit;
324 if (v <= 64)
325 setFixed(PhysReg{128 + v});
326 else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
327 setFixed(PhysReg{192 - v});
328 else if (v == 0x3f000000) /* 0.5 */
329 setFixed(PhysReg{240});
330 else if (v == 0xbf000000) /* -0.5 */
331 setFixed(PhysReg{241});
332 else if (v == 0x3f800000) /* 1.0 */
333 setFixed(PhysReg{242});
334 else if (v == 0xbf800000) /* -1.0 */
335 setFixed(PhysReg{243});
336 else if (v == 0x40000000) /* 2.0 */
337 setFixed(PhysReg{244});
338 else if (v == 0xc0000000) /* -2.0 */
339 setFixed(PhysReg{245});
340 else if (v == 0x40800000) /* 4.0 */
341 setFixed(PhysReg{246});
342 else if (v == 0xc0800000) /* -4.0 */
343 setFixed(PhysReg{247});
344 else { /* Literal Constant */
345 assert(!is64bit && "attempt to create a 64-bit literal constant");
346 setFixed(PhysReg{255});
347 }
348 };
349 explicit Operand(uint64_t v) noexcept
350 {
351 isConstant_ = true;
352 is64BitConst_ = true;
353 if (v <= 64) {
354 data_.i = (uint32_t) v;
355 setFixed(PhysReg{128 + (uint32_t) v});
356 } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
357 data_.i = (uint32_t) v;
358 setFixed(PhysReg{192 - (uint32_t) v});
359 } else if (v == 0x3FE0000000000000) { /* 0.5 */
360 data_.i = 0x3f000000;
361 setFixed(PhysReg{240});
362 } else if (v == 0xBFE0000000000000) { /* -0.5 */
363 data_.i = 0xbf000000;
364 setFixed(PhysReg{241});
365 } else if (v == 0x3FF0000000000000) { /* 1.0 */
366 data_.i = 0x3f800000;
367 setFixed(PhysReg{242});
368 } else if (v == 0xBFF0000000000000) { /* -1.0 */
369 data_.i = 0xbf800000;
370 setFixed(PhysReg{243});
371 } else if (v == 0x4000000000000000) { /* 2.0 */
372 data_.i = 0x40000000;
373 setFixed(PhysReg{244});
374 } else if (v == 0xC000000000000000) { /* -2.0 */
375 data_.i = 0xc0000000;
376 setFixed(PhysReg{245});
377 } else if (v == 0x4010000000000000) { /* 4.0 */
378 data_.i = 0x40800000;
379 setFixed(PhysReg{246});
380 } else if (v == 0xC010000000000000) { /* -4.0 */
381 data_.i = 0xc0800000;
382 setFixed(PhysReg{247});
383 } else { /* Literal Constant: we don't know if it is a long or double.*/
384 isConstant_ = 0;
385 assert(false && "attempt to create a 64-bit literal constant");
386 }
387 };
388 explicit Operand(RegClass type) noexcept
389 {
390 isUndef_ = true;
391 data_.temp = Temp(0, type);
392 setFixed(PhysReg{128});
393 };
394 explicit Operand(PhysReg reg, RegClass type) noexcept
395 {
396 data_.temp = Temp(0, type);
397 setFixed(reg);
398 }
399
400 constexpr bool isTemp() const noexcept
401 {
402 return isTemp_;
403 }
404
405 constexpr void setTemp(Temp t) noexcept {
406 assert(!isConstant_);
407 isTemp_ = true;
408 data_.temp = t;
409 }
410
411 constexpr Temp getTemp() const noexcept
412 {
413 return data_.temp;
414 }
415
416 constexpr uint32_t tempId() const noexcept
417 {
418 return data_.temp.id();
419 }
420
421 constexpr bool hasRegClass() const noexcept
422 {
423 return isTemp() || isUndefined();
424 }
425
426 constexpr RegClass regClass() const noexcept
427 {
428 return data_.temp.regClass();
429 }
430
431 constexpr unsigned size() const noexcept
432 {
433 if (isConstant())
434 return is64BitConst_ ? 2 : 1;
435 else
436 return data_.temp.size();
437 }
438
439 constexpr bool isFixed() const noexcept
440 {
441 return isFixed_;
442 }
443
444 constexpr PhysReg physReg() const noexcept
445 {
446 return reg_;
447 }
448
449 constexpr void setFixed(PhysReg reg) noexcept
450 {
451 isFixed_ = reg != unsigned(-1);
452 reg_ = reg;
453 }
454
455 constexpr bool isConstant() const noexcept
456 {
457 return isConstant_;
458 }
459
460 constexpr bool isLiteral() const noexcept
461 {
462 return isConstant() && reg_ == 255;
463 }
464
465 constexpr bool isUndefined() const noexcept
466 {
467 return isUndef_;
468 }
469
470 constexpr uint32_t constantValue() const noexcept
471 {
472 return data_.i;
473 }
474
475 constexpr bool constantEquals(uint32_t cmp) const noexcept
476 {
477 return isConstant() && constantValue() == cmp;
478 }
479
480 constexpr uint64_t constantValue64(bool signext=false) const noexcept
481 {
482 if (is64BitConst_) {
483 if (reg_ <= 192)
484 return reg_ - 128;
485 else if (reg_ <= 208)
486 return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
487
488 switch (reg_) {
489 case 240:
490 return 0x3FE0000000000000;
491 case 241:
492 return 0xBFE0000000000000;
493 case 242:
494 return 0x3FF0000000000000;
495 case 243:
496 return 0xBFF0000000000000;
497 case 244:
498 return 0x4000000000000000;
499 case 245:
500 return 0xC000000000000000;
501 case 246:
502 return 0x4010000000000000;
503 case 247:
504 return 0xC010000000000000;
505 }
506 }
507 return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
508 }
509
510 /* Indicates that the killed operand's live range intersects with the
511 * instruction's definitions. Unlike isKill() and isFirstKill(), this is
512 * not set by liveness analysis. */
513 constexpr void setLateKill(bool flag) noexcept
514 {
515 isLateKill_ = flag;
516 }
517
518 constexpr bool isLateKill() const noexcept
519 {
520 return isLateKill_;
521 }
522
523 constexpr void setKill(bool flag) noexcept
524 {
525 isKill_ = flag;
526 if (!flag)
527 setFirstKill(false);
528 }
529
530 constexpr bool isKill() const noexcept
531 {
532 return isKill_ || isFirstKill();
533 }
534
535 constexpr void setFirstKill(bool flag) noexcept
536 {
537 isFirstKill_ = flag;
538 if (flag)
539 setKill(flag);
540 }
541
542 /* When there are multiple operands killing the same temporary,
543 * isFirstKill() is only returns true for the first one. */
544 constexpr bool isFirstKill() const noexcept
545 {
546 return isFirstKill_;
547 }
548
549 constexpr bool isKillBeforeDef() const noexcept
550 {
551 return isKill() && !isLateKill();
552 }
553
554 constexpr bool isFirstKillBeforeDef() const noexcept
555 {
556 return isFirstKill() && !isLateKill();
557 }
558
559 constexpr bool operator == (Operand other) const noexcept
560 {
561 if (other.size() != size())
562 return false;
563 if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
564 return false;
565 if (isFixed() && other.isFixed() && physReg() != other.physReg())
566 return false;
567 if (isLiteral())
568 return other.isLiteral() && other.constantValue() == constantValue();
569 else if (isConstant())
570 return other.isConstant() && other.physReg() == physReg();
571 else if (isUndefined())
572 return other.isUndefined() && other.regClass() == regClass();
573 else
574 return other.isTemp() && other.getTemp() == getTemp();
575 }
576 private:
577 union {
578 uint32_t i;
579 float f;
580 Temp temp = Temp(0, s1);
581 } data_;
582 PhysReg reg_;
583 union {
584 struct {
585 uint8_t isTemp_:1;
586 uint8_t isFixed_:1;
587 uint8_t isConstant_:1;
588 uint8_t isKill_:1;
589 uint8_t isUndef_:1;
590 uint8_t isFirstKill_:1;
591 uint8_t is64BitConst_:1;
592 uint8_t isLateKill_:1;
593 };
594 /* can't initialize bit-fields in c++11, so work around using a union */
595 uint8_t control_ = 0;
596 };
597 };
598
599 /**
600 * Definition Class
601 * Definitions are the results of Instructions
602 * and refer to temporary virtual registers
603 * which are later mapped to physical registers
604 */
605 class Definition final
606 {
607 public:
608 constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
609 Definition(uint32_t index, RegClass type) noexcept
610 : temp(index, type) {}
611 explicit Definition(Temp tmp) noexcept
612 : temp(tmp) {}
613 Definition(PhysReg reg, RegClass type) noexcept
614 : temp(Temp(0, type))
615 {
616 setFixed(reg);
617 }
618 Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
619 : temp(Temp(tmpId, type))
620 {
621 setFixed(reg);
622 }
623
624 constexpr bool isTemp() const noexcept
625 {
626 return tempId() > 0;
627 }
628
629 constexpr Temp getTemp() const noexcept
630 {
631 return temp;
632 }
633
634 constexpr uint32_t tempId() const noexcept
635 {
636 return temp.id();
637 }
638
639 constexpr void setTemp(Temp t) noexcept {
640 temp = t;
641 }
642
643 constexpr RegClass regClass() const noexcept
644 {
645 return temp.regClass();
646 }
647
648 constexpr unsigned size() const noexcept
649 {
650 return temp.size();
651 }
652
653 constexpr bool isFixed() const noexcept
654 {
655 return isFixed_;
656 }
657
658 constexpr PhysReg physReg() const noexcept
659 {
660 return reg_;
661 }
662
663 constexpr void setFixed(PhysReg reg) noexcept
664 {
665 isFixed_ = 1;
666 reg_ = reg;
667 }
668
669 constexpr void setHint(PhysReg reg) noexcept
670 {
671 hasHint_ = 1;
672 reg_ = reg;
673 }
674
675 constexpr bool hasHint() const noexcept
676 {
677 return hasHint_;
678 }
679
680 constexpr void setKill(bool flag) noexcept
681 {
682 isKill_ = flag;
683 }
684
685 constexpr bool isKill() const noexcept
686 {
687 return isKill_;
688 }
689
690 private:
691 Temp temp = Temp(0, s1);
692 PhysReg reg_;
693 union {
694 struct {
695 uint8_t isFixed_:1;
696 uint8_t hasHint_:1;
697 uint8_t isKill_:1;
698 };
699 /* can't initialize bit-fields in c++11, so work around using a union */
700 uint8_t control_ = 0;
701 };
702 };
703
704 class Block;
705
706 struct Instruction {
707 aco_opcode opcode;
708 Format format;
709 uint32_t pass_flags;
710
711 aco::span<Operand> operands;
712 aco::span<Definition> definitions;
713
714 constexpr bool isVALU() const noexcept
715 {
716 return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
717 || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
718 || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
719 || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
720 || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
721 || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
722 }
723
724 constexpr bool isSALU() const noexcept
725 {
726 return format == Format::SOP1 ||
727 format == Format::SOP2 ||
728 format == Format::SOPC ||
729 format == Format::SOPK ||
730 format == Format::SOPP;
731 }
732
733 constexpr bool isVMEM() const noexcept
734 {
735 return format == Format::MTBUF ||
736 format == Format::MUBUF ||
737 format == Format::MIMG;
738 }
739
740 constexpr bool isDPP() const noexcept
741 {
742 return (uint16_t) format & (uint16_t) Format::DPP;
743 }
744
745 constexpr bool isVOP3() const noexcept
746 {
747 return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
748 ((uint16_t) format & (uint16_t) Format::VOP3B) ||
749 format == Format::VOP3P;
750 }
751
752 constexpr bool isSDWA() const noexcept
753 {
754 return (uint16_t) format & (uint16_t) Format::SDWA;
755 }
756
757 constexpr bool isFlatOrGlobal() const noexcept
758 {
759 return format == Format::FLAT || format == Format::GLOBAL;
760 }
761
762 constexpr bool usesModifiers() const noexcept;
763
764 constexpr bool reads_exec() const noexcept
765 {
766 for (const Operand& op : operands) {
767 if (op.isFixed() && op.physReg() == exec)
768 return true;
769 }
770 return false;
771 }
772 };
773
774 struct SOPK_instruction : public Instruction {
775 uint16_t imm;
776 };
777
778 struct SOPP_instruction : public Instruction {
779 uint32_t imm;
780 int block;
781 };
782
783 struct SOPC_instruction : public Instruction {
784 };
785
786 struct SOP1_instruction : public Instruction {
787 };
788
789 struct SOP2_instruction : public Instruction {
790 };
791
792 /**
793 * Scalar Memory Format:
794 * For s_(buffer_)load_dword*:
795 * Operand(0): SBASE - SGPR-pair which provides base address
796 * Operand(1): Offset - immediate (un)signed offset or SGPR
797 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
798 * Operand(n-1): SOffset - SGPR offset (Vega only)
799 *
800 * Having no operands is also valid for instructions such as s_dcache_inv.
801 *
802 */
803 struct SMEM_instruction : public Instruction {
804 bool glc : 1; /* VI+: globally coherent */
805 bool dlc : 1; /* NAVI: device level coherent */
806 bool nv : 1; /* VEGA only: Non-volatile */
807 bool can_reorder : 1;
808 bool disable_wqm : 1;
809 barrier_interaction barrier;
810 };
811
812 struct VOP1_instruction : public Instruction {
813 };
814
815 struct VOP2_instruction : public Instruction {
816 };
817
818 struct VOPC_instruction : public Instruction {
819 };
820
821 struct VOP3A_instruction : public Instruction {
822 bool abs[3];
823 bool neg[3];
824 uint8_t opsel : 4;
825 uint8_t omod : 2;
826 bool clamp : 1;
827 };
828
829 /**
830 * Data Parallel Primitives Format:
831 * This format can be used for VOP1, VOP2 or VOPC instructions.
832 * The swizzle applies to the src0 operand.
833 *
834 */
835 struct DPP_instruction : public Instruction {
836 bool abs[2];
837 bool neg[2];
838 uint16_t dpp_ctrl;
839 uint8_t row_mask : 4;
840 uint8_t bank_mask : 4;
841 bool bound_ctrl : 1;
842 };
843
844 struct Interp_instruction : public Instruction {
845 uint8_t attribute;
846 uint8_t component;
847 };
848
849 /**
850 * Local and Global Data Sharing instructions
851 * Operand(0): ADDR - VGPR which supplies the address.
852 * Operand(1): DATA0 - First data VGPR.
853 * Operand(2): DATA1 - Second data VGPR.
854 * Operand(n-1): M0 - LDS size.
855 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
856 *
857 */
858 struct DS_instruction : public Instruction {
859 int16_t offset0;
860 int8_t offset1;
861 bool gds;
862 };
863
864 /**
865 * Vector Memory Untyped-buffer Instructions
866 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
867 * Operand(1): VADDR - Address source. Can carry an index and/or offset
868 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
869 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
870 *
871 */
872 struct MUBUF_instruction : public Instruction {
873 uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
874 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
875 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
876 bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
877 bool glc : 1; /* globally coherent */
878 bool dlc : 1; /* NAVI: device level coherent */
879 bool slc : 1; /* system level coherent */
880 bool tfe : 1; /* texture fail enable */
881 bool lds : 1; /* Return read-data to LDS instead of VGPRs */
882 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
883 bool can_reorder : 1;
884 barrier_interaction barrier;
885 };
886
887 /**
888 * Vector Memory Typed-buffer Instructions
889 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
890 * Operand(1): VADDR - Address source. Can carry an index and/or offset
891 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
892 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
893 *
894 */
895 struct MTBUF_instruction : public Instruction {
896 uint16_t offset; /* Unsigned byte offset - 12 bit */
897 uint8_t dfmt : 4; /* Data Format of data in memory buffer */
898 uint8_t nfmt : 3; /* Numeric format of data in memory */
899 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
900 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
901 bool glc : 1; /* globally coherent */
902 bool dlc : 1; /* NAVI: device level coherent */
903 bool slc : 1; /* system level coherent */
904 bool tfe : 1; /* texture fail enable */
905 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
906 bool can_reorder : 1;
907 barrier_interaction barrier;
908 };
909
910 /**
911 * Vector Memory Image Instructions
912 * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
913 * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
914 * or VDATA - Vector GPR for write data.
915 * Operand(2): VADDR - Address source. Can carry an offset or an index.
916 * Definition(0): VDATA - Vector GPR for read result.
917 *
918 */
919 struct MIMG_instruction : public Instruction {
920 uint8_t dmask; /* Data VGPR enable mask */
921 uint8_t dim : 3; /* NAVI: dimensionality */
922 bool unrm : 1; /* Force address to be un-normalized */
923 bool dlc : 1; /* NAVI: device level coherent */
924 bool glc : 1; /* globally coherent */
925 bool slc : 1; /* system level coherent */
926 bool tfe : 1; /* texture fail enable */
927 bool da : 1; /* declare an array */
928 bool lwe : 1; /* Force data to be un-normalized */
929 bool r128 : 1; /* NAVI: Texture resource size */
930 bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
931 bool d16 : 1; /* Convert 32-bit data to 16-bit data */
932 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
933 bool can_reorder : 1;
934 barrier_interaction barrier;
935 };
936
937 /**
938 * Flat/Scratch/Global Instructions
939 * Operand(0): ADDR
940 * Operand(1): SADDR
941 * Operand(2) / Definition(0): DATA/VDST
942 *
943 */
944 struct FLAT_instruction : public Instruction {
945 uint16_t offset; /* Vega/Navi only */
946 bool slc : 1; /* system level coherent */
947 bool glc : 1; /* globally coherent */
948 bool dlc : 1; /* NAVI: device level coherent */
949 bool lds : 1;
950 bool nv : 1;
951 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
952 bool can_reorder : 1;
953 barrier_interaction barrier;
954 };
955
956 struct Export_instruction : public Instruction {
957 uint8_t enabled_mask;
958 uint8_t dest;
959 bool compressed : 1;
960 bool done : 1;
961 bool valid_mask : 1;
962 };
963
964 struct Pseudo_instruction : public Instruction {
965 bool tmp_in_scc;
966 PhysReg scratch_sgpr; /* might not be valid if it's not needed */
967 };
968
969 struct Pseudo_branch_instruction : public Instruction {
970 /* target[0] is the block index of the branch target.
971 * For conditional branches, target[1] contains the fall-through alternative.
972 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
973 */
974 uint32_t target[2];
975 };
976
977 struct Pseudo_barrier_instruction : public Instruction {
978 };
979
980 enum ReduceOp {
981 iadd32, iadd64,
982 imul32, imul64,
983 fadd32, fadd64,
984 fmul32, fmul64,
985 imin32, imin64,
986 imax32, imax64,
987 umin32, umin64,
988 umax32, umax64,
989 fmin32, fmin64,
990 fmax32, fmax64,
991 iand32, iand64,
992 ior32, ior64,
993 ixor32, ixor64,
994 gfx10_wave64_bpermute
995 };
996
997 /**
998 * Subgroup Reduction Instructions, everything except for the data to be
999 * reduced and the result as inserted by setup_reduce_temp().
1000 * Operand(0): data to be reduced
1001 * Operand(1): reduce temporary
1002 * Operand(2): vector temporary
1003 * Definition(0): result
1004 * Definition(1): scalar temporary
1005 * Definition(2): scalar identity temporary (not used to store identity on GFX10)
1006 * Definition(3): scc clobber
1007 * Definition(4): vcc clobber
1008 *
1009 */
1010 struct Pseudo_reduction_instruction : public Instruction {
1011 ReduceOp reduce_op;
1012 unsigned cluster_size; // must be 0 for scans
1013 };
1014
1015 struct instr_deleter_functor {
1016 void operator()(void* p) {
1017 free(p);
1018 }
1019 };
1020
1021 template<typename T>
1022 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
1023
1024 template<typename T>
1025 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
1026 {
1027 std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
1028 char *data = (char*) calloc(1, size);
1029 T* inst = (T*) data;
1030
1031 inst->opcode = opcode;
1032 inst->format = format;
1033
1034 uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
1035 inst->operands = aco::span<Operand>(operands_offset, num_operands);
1036 uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
1037 inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
1038
1039 return inst;
1040 }
1041
1042 constexpr bool Instruction::usesModifiers() const noexcept
1043 {
1044 if (isDPP() || isSDWA())
1045 return true;
1046 if (!isVOP3())
1047 return false;
1048 const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
1049 for (unsigned i = 0; i < operands.size(); i++) {
1050 if (vop3->abs[i] || vop3->neg[i])
1051 return true;
1052 }
1053 return vop3->opsel || vop3->clamp || vop3->omod;
1054 }
1055
1056 constexpr bool is_phi(Instruction* instr)
1057 {
1058 return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1059 }
1060
1061 static inline bool is_phi(aco_ptr<Instruction>& instr)
1062 {
1063 return is_phi(instr.get());
1064 }
1065
1066 barrier_interaction get_barrier_interaction(Instruction* instr);
1067
1068 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
1069
1070 enum block_kind {
1071 /* uniform indicates that leaving this block,
1072 * all actives lanes stay active */
1073 block_kind_uniform = 1 << 0,
1074 block_kind_top_level = 1 << 1,
1075 block_kind_loop_preheader = 1 << 2,
1076 block_kind_loop_header = 1 << 3,
1077 block_kind_loop_exit = 1 << 4,
1078 block_kind_continue = 1 << 5,
1079 block_kind_break = 1 << 6,
1080 block_kind_continue_or_break = 1 << 7,
1081 block_kind_discard = 1 << 8,
1082 block_kind_branch = 1 << 9,
1083 block_kind_merge = 1 << 10,
1084 block_kind_invert = 1 << 11,
1085 block_kind_uses_discard_if = 1 << 12,
1086 block_kind_needs_lowering = 1 << 13,
1087 block_kind_uses_demote = 1 << 14,
1088 block_kind_export_end = 1 << 15,
1089 };
1090
1091
1092 struct RegisterDemand {
1093 constexpr RegisterDemand() = default;
1094 constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1095 : vgpr{v}, sgpr{s} {}
1096 int16_t vgpr = 0;
1097 int16_t sgpr = 0;
1098
1099 constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1100 return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1101 }
1102
1103 constexpr bool exceeds(const RegisterDemand other) const noexcept {
1104 return vgpr > other.vgpr || sgpr > other.sgpr;
1105 }
1106
1107 constexpr RegisterDemand operator+(const Temp t) const noexcept {
1108 if (t.type() == RegType::sgpr)
1109 return RegisterDemand( vgpr, sgpr + t.size() );
1110 else
1111 return RegisterDemand( vgpr + t.size(), sgpr );
1112 }
1113
1114 constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1115 return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1116 }
1117
1118 constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1119 return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1120 }
1121
1122 constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1123 vgpr += other.vgpr;
1124 sgpr += other.sgpr;
1125 return *this;
1126 }
1127
1128 constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1129 vgpr -= other.vgpr;
1130 sgpr -= other.sgpr;
1131 return *this;
1132 }
1133
1134 constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1135 if (t.type() == RegType::sgpr)
1136 sgpr += t.size();
1137 else
1138 vgpr += t.size();
1139 return *this;
1140 }
1141
1142 constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1143 if (t.type() == RegType::sgpr)
1144 sgpr -= t.size();
1145 else
1146 vgpr -= t.size();
1147 return *this;
1148 }
1149
1150 constexpr void update(const RegisterDemand other) noexcept {
1151 vgpr = std::max(vgpr, other.vgpr);
1152 sgpr = std::max(sgpr, other.sgpr);
1153 }
1154
1155 };
1156
1157 /* CFG */
1158 struct Block {
1159 float_mode fp_mode;
1160 unsigned index;
1161 unsigned offset = 0;
1162 std::vector<aco_ptr<Instruction>> instructions;
1163 std::vector<unsigned> logical_preds;
1164 std::vector<unsigned> linear_preds;
1165 std::vector<unsigned> logical_succs;
1166 std::vector<unsigned> linear_succs;
1167 RegisterDemand register_demand = RegisterDemand();
1168 uint16_t loop_nest_depth = 0;
1169 uint16_t kind = 0;
1170 int logical_idom = -1;
1171 int linear_idom = -1;
1172 Temp live_out_exec = Temp();
1173
1174 /* this information is needed for predecessors to blocks with phis when
1175 * moving out of ssa */
1176 bool scc_live_out = false;
1177 PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1178
1179 Block(unsigned idx) : index(idx) {}
1180 Block() : index(0) {}
1181 };
1182
1183 using Stage = uint16_t;
1184
1185 /* software stages */
1186 static constexpr Stage sw_vs = 1 << 0;
1187 static constexpr Stage sw_gs = 1 << 1;
1188 static constexpr Stage sw_tcs = 1 << 2;
1189 static constexpr Stage sw_tes = 1 << 3;
1190 static constexpr Stage sw_fs = 1 << 4;
1191 static constexpr Stage sw_cs = 1 << 5;
1192 static constexpr Stage sw_gs_copy = 1 << 6;
1193 static constexpr Stage sw_mask = 0x7f;
1194
1195 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1196 static constexpr Stage hw_vs = 1 << 7;
1197 static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1198 static constexpr Stage hw_gs = 1 << 9;
1199 static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1200 static constexpr Stage hw_hs = 1 << 11;
1201 static constexpr Stage hw_fs = 1 << 12;
1202 static constexpr Stage hw_cs = 1 << 13;
1203 static constexpr Stage hw_mask = 0x7f << 7;
1204
1205 /* possible settings of Program::stage */
1206 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1207 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1208 static constexpr Stage compute_cs = sw_cs | hw_cs;
1209 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1210 static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
1211 /* GFX10/NGG */
1212 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1213 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1214 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1215 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1216 /* GFX9 (and GFX10 if NGG isn't used) */
1217 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1218 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1219 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1220 /* pre-GFX9 */
1221 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1222 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1223 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1224 static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
1225 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1226
1227 enum statistic {
1228 statistic_hash,
1229 statistic_instructions,
1230 statistic_copies,
1231 statistic_branches,
1232 statistic_cycles,
1233 statistic_vmem_clauses,
1234 statistic_smem_clauses,
1235 statistic_vmem_score,
1236 statistic_smem_score,
1237 statistic_sgpr_presched,
1238 statistic_vgpr_presched,
1239 num_statistics
1240 };
1241
1242 class Program final {
1243 public:
1244 float_mode next_fp_mode;
1245 std::vector<Block> blocks;
1246 RegisterDemand max_reg_demand = RegisterDemand();
1247 uint16_t num_waves = 0;
1248 uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1249 ac_shader_config* config;
1250 struct radv_shader_info *info;
1251 enum chip_class chip_class;
1252 enum radeon_family family;
1253 unsigned wave_size;
1254 RegClass lane_mask;
1255 Stage stage; /* Stage */
1256 bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1257 bool needs_wqm = false; /* there exists a p_wqm instruction */
1258 bool wb_smem_l1_on_end = false;
1259
1260 std::vector<uint8_t> constant_data;
1261 Temp private_segment_buffer;
1262 Temp scratch_offset;
1263
1264 uint16_t min_waves = 0;
1265 uint16_t lds_alloc_granule;
1266 uint32_t lds_limit; /* in bytes */
1267 bool has_16bank_lds;
1268 uint16_t vgpr_limit;
1269 uint16_t sgpr_limit;
1270 uint16_t physical_sgprs;
1271 uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1272 uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1273 unsigned workgroup_size; /* if known; otherwise UINT_MAX */
1274
1275 bool xnack_enabled = false;
1276
1277 bool needs_vcc = false;
1278 bool needs_flat_scr = false;
1279
1280 bool collect_statistics = false;
1281 uint32_t statistics[num_statistics];
1282
1283 uint32_t allocateId()
1284 {
1285 assert(allocationID <= 16777215);
1286 return allocationID++;
1287 }
1288
1289 uint32_t peekAllocationId()
1290 {
1291 return allocationID;
1292 }
1293
1294 void setAllocationId(uint32_t id)
1295 {
1296 allocationID = id;
1297 }
1298
1299 Block* create_and_insert_block() {
1300 blocks.emplace_back(blocks.size());
1301 blocks.back().fp_mode = next_fp_mode;
1302 return &blocks.back();
1303 }
1304
1305 Block* insert_block(Block&& block) {
1306 block.index = blocks.size();
1307 block.fp_mode = next_fp_mode;
1308 blocks.emplace_back(std::move(block));
1309 return &blocks.back();
1310 }
1311
1312 private:
1313 uint32_t allocationID = 1;
1314 };
1315
1316 struct live {
1317 /* live temps out per block */
1318 std::vector<std::set<Temp>> live_out;
1319 /* register demand (sgpr/vgpr) per instruction per block */
1320 std::vector<std::vector<RegisterDemand>> register_demand;
1321 };
1322
1323 void select_program(Program *program,
1324 unsigned shader_count,
1325 struct nir_shader *const *shaders,
1326 ac_shader_config* config,
1327 struct radv_shader_args *args);
1328 void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
1329 ac_shader_config* config,
1330 struct radv_shader_args *args);
1331
1332 void lower_wqm(Program* program, live& live_vars,
1333 const struct radv_nir_compiler_options *options);
1334 void lower_bool_phis(Program* program);
1335 void calc_min_waves(Program* program);
1336 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1337 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1338 std::vector<uint16_t> dead_code_analysis(Program *program);
1339 void dominator_tree(Program* program);
1340 void insert_exec_mask(Program *program);
1341 void value_numbering(Program* program);
1342 void optimize(Program* program);
1343 void setup_reduce_temp(Program* program);
1344 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1345 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1346 void ssa_elimination(Program* program);
1347 void lower_to_hw_instr(Program* program);
1348 void schedule_program(Program* program, live& live_vars);
1349 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1350 void insert_wait_states(Program* program);
1351 void insert_NOPs(Program* program);
1352 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1353 void print_asm(Program *program, std::vector<uint32_t>& binary,
1354 unsigned exec_size, std::ostream& out);
1355 void validate(Program* program, FILE *output);
1356 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1357 #ifndef NDEBUG
1358 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1359 #else
1360 #define perfwarn(program, cond, msg, ...) do {} while(0)
1361 #endif
1362
1363 void collect_presched_stats(Program *program);
1364 void collect_preasm_stats(Program *program);
1365 void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
1366
1367 void aco_print_instr(Instruction *instr, FILE *output);
1368 void aco_print_program(Program *program, FILE *output);
1369
1370 /* utilities for dealing with register demand */
1371 RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
1372 RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
1373 RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, aco_ptr<Instruction>& instr_before);
1374
1375 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1376 uint16_t get_extra_sgprs(Program *program);
1377
1378 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1379 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1380 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1381
1382 /* return number of addressable sgprs/vgprs for max_waves */
1383 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1384 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1385
1386 typedef struct {
1387 const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1388 const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1389 const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1390 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1391 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1392 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1393 const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1394 const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1395 } Info;
1396
1397 extern const Info instr_info;
1398
1399 }
1400
1401 #endif /* ACO_IR_H */
1402