aco: Set +wavefrontsize64 for LLVM disassembler in GFX10 wave64 mode.
[mesa.git] / src / amd / compiler / aco_ir.h
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifndef ACO_IR_H
26 #define ACO_IR_H
27
28 #include <vector>
29 #include <set>
30 #include <bitset>
31 #include <memory>
32
33 #include "nir.h"
34 #include "ac_binary.h"
35 #include "amd_family.h"
36 #include "aco_opcodes.h"
37 #include "aco_util.h"
38
39 struct radv_nir_compiler_options;
40 struct radv_shader_info;
41
42 namespace aco {
43
44 extern uint64_t debug_flags;
45
46 enum {
47 DEBUG_VALIDATE = 0x1,
48 DEBUG_VALIDATE_RA = 0x2,
49 DEBUG_PERFWARN = 0x4,
50 };
51
52 /**
53 * Representation of the instruction's microcode encoding format
54 * Note: Some Vector ALU Formats can be combined, such that:
55 * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
56 * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
57 * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
58 *
59 * (*) The same is applicable for VOP1 and VOPC instructions.
60 */
61 enum class Format : std::uint16_t {
62 /* Pseudo Instruction Format */
63 PSEUDO = 0,
64 /* Scalar ALU & Control Formats */
65 SOP1 = 1,
66 SOP2 = 2,
67 SOPK = 3,
68 SOPP = 4,
69 SOPC = 5,
70 /* Scalar Memory Format */
71 SMEM = 6,
72 /* LDS/GDS Format */
73 DS = 8,
74 /* Vector Memory Buffer Formats */
75 MTBUF = 9,
76 MUBUF = 10,
77 /* Vector Memory Image Format */
78 MIMG = 11,
79 /* Export Format */
80 EXP = 12,
81 /* Flat Formats */
82 FLAT = 13,
83 GLOBAL = 14,
84 SCRATCH = 15,
85
86 PSEUDO_BRANCH = 16,
87 PSEUDO_BARRIER = 17,
88 PSEUDO_REDUCTION = 18,
89
90 /* Vector ALU Formats */
91 VOP1 = 1 << 8,
92 VOP2 = 1 << 9,
93 VOPC = 1 << 10,
94 VOP3 = 1 << 11,
95 VOP3A = 1 << 11,
96 VOP3B = 1 << 11,
97 VOP3P = 1 << 12,
98 /* Vector Parameter Interpolation Format */
99 VINTRP = 1 << 13,
100 DPP = 1 << 14,
101 SDWA = 1 << 15,
102 };
103
104 enum barrier_interaction {
105 barrier_none = 0,
106 barrier_buffer = 0x1,
107 barrier_image = 0x2,
108 barrier_atomic = 0x4,
109 barrier_shared = 0x8,
110 barrier_count = 4,
111 };
112
113 constexpr Format asVOP3(Format format) {
114 return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
115 };
116
117 enum class RegType {
118 none = 0,
119 sgpr,
120 vgpr,
121 linear_vgpr,
122 };
123
124 struct RegClass {
125
126 enum RC : uint8_t {
127 s1 = 1,
128 s2 = 2,
129 s3 = 3,
130 s4 = 4,
131 s6 = 6,
132 s8 = 8,
133 s16 = 16,
134 v1 = s1 | (1 << 5),
135 v2 = s2 | (1 << 5),
136 v3 = s3 | (1 << 5),
137 v4 = s4 | (1 << 5),
138 v5 = 5 | (1 << 5),
139 v6 = 6 | (1 << 5),
140 v7 = 7 | (1 << 5),
141 v8 = 8 | (1 << 5),
142 /* these are used for WWM and spills to vgpr */
143 v1_linear = v1 | (1 << 6),
144 v2_linear = v2 | (1 << 6),
145 };
146
147 RegClass() = default;
148 constexpr RegClass(RC rc)
149 : rc(rc) {}
150 constexpr RegClass(RegType type, unsigned size)
151 : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
152
153 constexpr operator RC() const { return rc; }
154 explicit operator bool() = delete;
155
156 constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
157 constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
158 constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
159 constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
160
161 private:
162 RC rc;
163 };
164
165 /* transitional helper expressions */
166 static constexpr RegClass s1{RegClass::s1};
167 static constexpr RegClass s2{RegClass::s2};
168 static constexpr RegClass s3{RegClass::s3};
169 static constexpr RegClass s4{RegClass::s4};
170 static constexpr RegClass s8{RegClass::s8};
171 static constexpr RegClass s16{RegClass::s16};
172 static constexpr RegClass v1{RegClass::v1};
173 static constexpr RegClass v2{RegClass::v2};
174 static constexpr RegClass v3{RegClass::v3};
175 static constexpr RegClass v4{RegClass::v4};
176 static constexpr RegClass v5{RegClass::v5};
177 static constexpr RegClass v6{RegClass::v6};
178 static constexpr RegClass v7{RegClass::v7};
179 static constexpr RegClass v8{RegClass::v8};
180
181 /**
182 * Temp Class
183 * Each temporary virtual register has a
184 * register class (i.e. size and type)
185 * and SSA id.
186 */
187 struct Temp {
188 Temp() = default;
189 constexpr Temp(uint32_t id, RegClass cls) noexcept
190 : id_(id), reg_class(cls) {}
191
192 constexpr uint32_t id() const noexcept { return id_; }
193 constexpr RegClass regClass() const noexcept { return reg_class; }
194
195 constexpr unsigned size() const noexcept { return reg_class.size(); }
196 constexpr RegType type() const noexcept { return reg_class.type(); }
197 constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
198
199 constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
200 constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
201 constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
202
203 private:
204 uint32_t id_:24;
205 RegClass reg_class;
206 };
207
208 /**
209 * PhysReg
210 * Represents the physical register for each
211 * Operand and Definition.
212 */
213 struct PhysReg {
214 constexpr PhysReg() = default;
215 explicit constexpr PhysReg(unsigned r) : reg(r) {}
216 constexpr operator unsigned() const { return reg; }
217
218 uint16_t reg = 0;
219 };
220
221 /* helper expressions for special registers */
222 static constexpr PhysReg m0{124};
223 static constexpr PhysReg vcc{106};
224 static constexpr PhysReg exec{126};
225 static constexpr PhysReg exec_lo{126};
226 static constexpr PhysReg exec_hi{127};
227 static constexpr PhysReg scc{253};
228
229 /**
230 * Operand Class
231 * Initially, each Operand refers to either
232 * a temporary virtual register
233 * or to a constant value
234 * Temporary registers get mapped to physical register during RA
235 * Constant values are inlined into the instruction sequence.
236 */
237 class Operand final
238 {
239 public:
240 constexpr Operand()
241 : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
242 isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
243
244 explicit Operand(Temp r) noexcept
245 {
246 data_.temp = r;
247 if (r.id()) {
248 isTemp_ = true;
249 } else {
250 isUndef_ = true;
251 setFixed(PhysReg{128});
252 }
253 };
254 explicit Operand(uint32_t v) noexcept
255 {
256 data_.i = v;
257 isConstant_ = true;
258 if (v <= 64)
259 setFixed(PhysReg{128 + v});
260 else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
261 setFixed(PhysReg{192 - v});
262 else if (v == 0x3f000000) /* 0.5 */
263 setFixed(PhysReg{240});
264 else if (v == 0xbf000000) /* -0.5 */
265 setFixed(PhysReg{241});
266 else if (v == 0x3f800000) /* 1.0 */
267 setFixed(PhysReg{242});
268 else if (v == 0xbf800000) /* -1.0 */
269 setFixed(PhysReg{243});
270 else if (v == 0x40000000) /* 2.0 */
271 setFixed(PhysReg{244});
272 else if (v == 0xc0000000) /* -2.0 */
273 setFixed(PhysReg{245});
274 else if (v == 0x40800000) /* 4.0 */
275 setFixed(PhysReg{246});
276 else if (v == 0xc0800000) /* -4.0 */
277 setFixed(PhysReg{247});
278 else if (v == 0x3e22f983) /* 1/(2*PI) */
279 setFixed(PhysReg{248});
280 else /* Literal Constant */
281 setFixed(PhysReg{255});
282 };
283 explicit Operand(uint64_t v) noexcept
284 {
285 isConstant_ = true;
286 is64BitConst_ = true;
287 if (v <= 64)
288 setFixed(PhysReg{128 + (uint32_t) v});
289 else if (v >= 0xFFFFFFFFFFFFFFF0) /* [-16 .. -1] */
290 setFixed(PhysReg{192 - (uint32_t) v});
291 else if (v == 0x3FE0000000000000) /* 0.5 */
292 setFixed(PhysReg{240});
293 else if (v == 0xBFE0000000000000) /* -0.5 */
294 setFixed(PhysReg{241});
295 else if (v == 0x3FF0000000000000) /* 1.0 */
296 setFixed(PhysReg{242});
297 else if (v == 0xBFF0000000000000) /* -1.0 */
298 setFixed(PhysReg{243});
299 else if (v == 0x4000000000000000) /* 2.0 */
300 setFixed(PhysReg{244});
301 else if (v == 0xC000000000000000) /* -2.0 */
302 setFixed(PhysReg{245});
303 else if (v == 0x4010000000000000) /* 4.0 */
304 setFixed(PhysReg{246});
305 else if (v == 0xC010000000000000) /* -4.0 */
306 setFixed(PhysReg{247});
307 else if (v == 0x3fc45f306dc9c882) /* 1/(2*PI) */
308 setFixed(PhysReg{248});
309 else { /* Literal Constant: we don't know if it is a long or double.*/
310 isConstant_ = 0;
311 assert(false && "attempt to create a 64-bit literal constant");
312 }
313 };
314 explicit Operand(RegClass type) noexcept
315 {
316 isUndef_ = true;
317 data_.temp = Temp(0, type);
318 setFixed(PhysReg{128});
319 };
320 explicit Operand(PhysReg reg, RegClass type) noexcept
321 {
322 data_.temp = Temp(0, type);
323 setFixed(reg);
324 }
325
326 constexpr bool isTemp() const noexcept
327 {
328 return isTemp_;
329 }
330
331 constexpr void setTemp(Temp t) noexcept {
332 assert(!isConstant_);
333 isTemp_ = true;
334 data_.temp = t;
335 }
336
337 constexpr Temp getTemp() const noexcept
338 {
339 return data_.temp;
340 }
341
342 constexpr uint32_t tempId() const noexcept
343 {
344 return data_.temp.id();
345 }
346
347 constexpr bool hasRegClass() const noexcept
348 {
349 return isTemp() || isUndefined();
350 }
351
352 constexpr RegClass regClass() const noexcept
353 {
354 return data_.temp.regClass();
355 }
356
357 constexpr unsigned size() const noexcept
358 {
359 if (isConstant())
360 return is64BitConst_ ? 2 : 1;
361 else
362 return data_.temp.size();
363 }
364
365 constexpr bool isFixed() const noexcept
366 {
367 return isFixed_;
368 }
369
370 constexpr PhysReg physReg() const noexcept
371 {
372 return reg_;
373 }
374
375 constexpr void setFixed(PhysReg reg) noexcept
376 {
377 isFixed_ = reg != unsigned(-1);
378 reg_ = reg;
379 }
380
381 constexpr bool isConstant() const noexcept
382 {
383 return isConstant_;
384 }
385
386 constexpr bool isLiteral() const noexcept
387 {
388 return isConstant() && reg_ == 255;
389 }
390
391 constexpr bool isUndefined() const noexcept
392 {
393 return isUndef_;
394 }
395
396 constexpr uint32_t constantValue() const noexcept
397 {
398 return data_.i;
399 }
400
401 constexpr bool constantEquals(uint32_t cmp) const noexcept
402 {
403 return isConstant() && constantValue() == cmp;
404 }
405
406 constexpr void setKill(bool flag) noexcept
407 {
408 isKill_ = flag;
409 if (!flag)
410 setFirstKill(false);
411 }
412
413 constexpr bool isKill() const noexcept
414 {
415 return isKill_ || isFirstKill();
416 }
417
418 constexpr void setFirstKill(bool flag) noexcept
419 {
420 isFirstKill_ = flag;
421 if (flag)
422 setKill(flag);
423 }
424
425 /* When there are multiple operands killing the same temporary,
426 * isFirstKill() is only returns true for the first one. */
427 constexpr bool isFirstKill() const noexcept
428 {
429 return isFirstKill_;
430 }
431
432 private:
433 union {
434 uint32_t i;
435 float f;
436 Temp temp = Temp(0, s1);
437 } data_;
438 PhysReg reg_;
439 union {
440 struct {
441 uint8_t isTemp_:1;
442 uint8_t isFixed_:1;
443 uint8_t isConstant_:1;
444 uint8_t isKill_:1;
445 uint8_t isUndef_:1;
446 uint8_t isFirstKill_:1;
447 uint8_t is64BitConst_:1;
448 };
449 /* can't initialize bit-fields in c++11, so work around using a union */
450 uint8_t control_ = 0;
451 };
452 };
453
454 /**
455 * Definition Class
456 * Definitions are the results of Instructions
457 * and refer to temporary virtual registers
458 * which are later mapped to physical registers
459 */
460 class Definition final
461 {
462 public:
463 constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
464 Definition(uint32_t index, RegClass type) noexcept
465 : temp(index, type) {}
466 explicit Definition(Temp tmp) noexcept
467 : temp(tmp) {}
468 Definition(PhysReg reg, RegClass type) noexcept
469 : temp(Temp(0, type))
470 {
471 setFixed(reg);
472 }
473 Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
474 : temp(Temp(tmpId, type))
475 {
476 setFixed(reg);
477 }
478
479 constexpr bool isTemp() const noexcept
480 {
481 return tempId() > 0;
482 }
483
484 constexpr Temp getTemp() const noexcept
485 {
486 return temp;
487 }
488
489 constexpr uint32_t tempId() const noexcept
490 {
491 return temp.id();
492 }
493
494 constexpr void setTemp(Temp t) noexcept {
495 temp = t;
496 }
497
498 constexpr RegClass regClass() const noexcept
499 {
500 return temp.regClass();
501 }
502
503 constexpr unsigned size() const noexcept
504 {
505 return temp.size();
506 }
507
508 constexpr bool isFixed() const noexcept
509 {
510 return isFixed_;
511 }
512
513 constexpr PhysReg physReg() const noexcept
514 {
515 return reg_;
516 }
517
518 constexpr void setFixed(PhysReg reg) noexcept
519 {
520 isFixed_ = 1;
521 reg_ = reg;
522 }
523
524 constexpr void setHint(PhysReg reg) noexcept
525 {
526 hasHint_ = 1;
527 reg_ = reg;
528 }
529
530 constexpr bool hasHint() const noexcept
531 {
532 return hasHint_;
533 }
534
535 constexpr void setKill(bool flag) noexcept
536 {
537 isKill_ = flag;
538 }
539
540 constexpr bool isKill() const noexcept
541 {
542 return isKill_;
543 }
544
545 private:
546 Temp temp = Temp(0, s1);
547 PhysReg reg_;
548 union {
549 struct {
550 uint8_t isFixed_:1;
551 uint8_t hasHint_:1;
552 uint8_t isKill_:1;
553 };
554 /* can't initialize bit-fields in c++11, so work around using a union */
555 uint8_t control_ = 0;
556 };
557 };
558
559 class Block;
560
561 struct Instruction {
562 aco_opcode opcode;
563 Format format;
564 uint32_t pass_flags;
565
566 aco::span<Operand> operands;
567 aco::span<Definition> definitions;
568
569 constexpr bool isVALU() const noexcept
570 {
571 return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
572 || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
573 || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
574 || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
575 || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
576 || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
577 }
578
579 constexpr bool isSALU() const noexcept
580 {
581 return format == Format::SOP1 ||
582 format == Format::SOP2 ||
583 format == Format::SOPC ||
584 format == Format::SOPK ||
585 format == Format::SOPP;
586 }
587
588 constexpr bool isVMEM() const noexcept
589 {
590 return format == Format::MTBUF ||
591 format == Format::MUBUF ||
592 format == Format::MIMG;
593 }
594
595 constexpr bool isDPP() const noexcept
596 {
597 return (uint16_t) format & (uint16_t) Format::DPP;
598 }
599
600 constexpr bool isVOP3() const noexcept
601 {
602 return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
603 ((uint16_t) format & (uint16_t) Format::VOP3B) ||
604 format == Format::VOP3P;
605 }
606
607 constexpr bool isSDWA() const noexcept
608 {
609 return (uint16_t) format & (uint16_t) Format::SDWA;
610 }
611
612 constexpr bool isFlatOrGlobal() const noexcept
613 {
614 return format == Format::FLAT || format == Format::GLOBAL;
615 }
616 };
617
618 struct SOPK_instruction : public Instruction {
619 uint16_t imm;
620 };
621
622 struct SOPP_instruction : public Instruction {
623 uint32_t imm;
624 int block;
625 };
626
627 struct SOPC_instruction : public Instruction {
628 };
629
630 struct SOP1_instruction : public Instruction {
631 };
632
633 struct SOP2_instruction : public Instruction {
634 };
635
636 /**
637 * Scalar Memory Format:
638 * For s_(buffer_)load_dword*:
639 * Operand(0): SBASE - SGPR-pair which provides base address
640 * Operand(1): Offset - immediate (un)signed offset or SGPR
641 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
642 * Operand(n-1): SOffset - SGPR offset (Vega only)
643 *
644 * Having no operands is also valid for instructions such as s_dcache_inv.
645 *
646 */
647 struct SMEM_instruction : public Instruction {
648 bool glc; /* VI+: globally coherent */
649 bool dlc; /* NAVI: device level coherent */
650 bool nv; /* VEGA only: Non-volatile */
651 bool can_reorder;
652 bool disable_wqm;
653 barrier_interaction barrier;
654 };
655
656 struct VOP1_instruction : public Instruction {
657 };
658
659 struct VOP2_instruction : public Instruction {
660 };
661
662 struct VOPC_instruction : public Instruction {
663 };
664
665 struct VOP3A_instruction : public Instruction {
666 bool abs[3];
667 bool opsel[3];
668 bool clamp;
669 unsigned omod;
670 bool neg[3];
671 };
672
673 /**
674 * Data Parallel Primitives Format:
675 * This format can be used for VOP1, VOP2 or VOPC instructions.
676 * The swizzle applies to the src0 operand.
677 *
678 */
679 struct DPP_instruction : public Instruction {
680 uint16_t dpp_ctrl;
681 uint8_t row_mask;
682 uint8_t bank_mask;
683 bool abs[2];
684 bool neg[2];
685 bool bound_ctrl;
686 };
687
688 struct Interp_instruction : public Instruction {
689 unsigned attribute;
690 unsigned component;
691 };
692
693 /**
694 * Local and Global Data Sharing instructions
695 * Operand(0): ADDR - VGPR which supplies the address.
696 * Operand(1): DATA0 - First data VGPR.
697 * Operand(2): DATA1 - Second data VGPR.
698 * Operand(n-1): M0 - LDS size.
699 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
700 *
701 */
702 struct DS_instruction : public Instruction {
703 int16_t offset0;
704 int8_t offset1;
705 bool gds;
706 };
707
708 /**
709 * Vector Memory Untyped-buffer Instructions
710 * Operand(0): VADDR - Address source. Can carry an index and/or offset
711 * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
712 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
713 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
714 *
715 */
716 struct MUBUF_instruction : public Instruction {
717 unsigned offset; /* Unsigned byte offset - 12 bit */
718 bool offen; /* Supply an offset from VGPR (VADDR) */
719 bool idxen; /* Supply an index from VGPR (VADDR) */
720 bool glc; /* globally coherent */
721 bool dlc; /* NAVI: device level coherent */
722 bool slc; /* system level coherent */
723 bool tfe; /* texture fail enable */
724 bool lds; /* Return read-data to LDS instead of VGPRs */
725 bool disable_wqm; /* Require an exec mask without helper invocations */
726 bool can_reorder;
727 barrier_interaction barrier;
728 };
729
730 /**
731 * Vector Memory Typed-buffer Instructions
732 * Operand(0): VADDR - Address source. Can carry an index and/or offset
733 * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
734 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
735 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
736 *
737 */
738 struct MTBUF_instruction : public Instruction {
739 union {
740 struct {
741 uint8_t dfmt : 4; /* Data Format of data in memory buffer */
742 uint8_t nfmt : 3; /* Numeric format of data in memory */
743 };
744 uint8_t img_format; /* Buffer or image format as used by GFX10 */
745 };
746 unsigned offset; /* Unsigned byte offset - 12 bit */
747 bool offen; /* Supply an offset from VGPR (VADDR) */
748 bool idxen; /* Supply an index from VGPR (VADDR) */
749 bool glc; /* globally coherent */
750 bool dlc; /* NAVI: device level coherent */
751 bool slc; /* system level coherent */
752 bool tfe; /* texture fail enable */
753 bool disable_wqm; /* Require an exec mask without helper invocations */
754 bool can_reorder;
755 barrier_interaction barrier;
756 };
757
758 /**
759 * Vector Memory Image Instructions
760 * Operand(0): VADDR - Address source. Can carry an offset or an index.
761 * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
762 * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
763 * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
764 *
765 */
766 struct MIMG_instruction : public Instruction {
767 unsigned dmask; /* Data VGPR enable mask */
768 bool unrm; /* Force address to be un-normalized */
769 bool dlc; /* NAVI: device level coherent */
770 bool glc; /* globally coherent */
771 bool slc; /* system level coherent */
772 bool tfe; /* texture fail enable */
773 bool da; /* declare an array */
774 bool lwe; /* Force data to be un-normalized */
775 bool r128; /* NAVI: Texture resource size */
776 bool a16; /* VEGA, NAVI: Address components are 16-bits */
777 bool d16; /* Convert 32-bit data to 16-bit data */
778 bool disable_wqm; /* Require an exec mask without helper invocations */
779 bool can_reorder;
780 barrier_interaction barrier;
781 };
782
783 /**
784 * Flat/Scratch/Global Instructions
785 * Operand(0): ADDR
786 * Operand(1): SADDR
787 * Operand(2) / Definition(0): DATA/VDST
788 *
789 */
790 struct FLAT_instruction : public Instruction {
791 uint16_t offset; /* Vega only */
792 bool slc;
793 bool glc;
794 bool lds;
795 bool nv;
796 };
797
798 struct Export_instruction : public Instruction {
799 unsigned enabled_mask;
800 unsigned dest;
801 bool compressed;
802 bool done;
803 bool valid_mask;
804 };
805
806 struct Pseudo_instruction : public Instruction {
807 bool tmp_in_scc;
808 PhysReg scratch_sgpr; /* might not be valid if it's not needed */
809 };
810
811 struct Pseudo_branch_instruction : public Instruction {
812 /* target[0] is the block index of the branch target.
813 * For conditional branches, target[1] contains the fall-through alternative.
814 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
815 */
816 uint32_t target[2];
817 };
818
819 struct Pseudo_barrier_instruction : public Instruction {
820 };
821
822 enum ReduceOp {
823 iadd32, iadd64,
824 imul32, imul64,
825 fadd32, fadd64,
826 fmul32, fmul64,
827 imin32, imin64,
828 imax32, imax64,
829 umin32, umin64,
830 umax32, umax64,
831 fmin32, fmin64,
832 fmax32, fmax64,
833 iand32, iand64,
834 ior32, ior64,
835 ixor32, ixor64,
836 };
837
838 /**
839 * Subgroup Reduction Instructions, everything except for the data to be
840 * reduced and the result as inserted by setup_reduce_temp().
841 * Operand(0): data to be reduced
842 * Operand(1): reduce temporary
843 * Operand(2): vector temporary
844 * Definition(0): result
845 * Definition(1): scalar temporary
846 * Definition(2): scalar identity temporary
847 * Definition(3): scc clobber
848 * Definition(4): vcc clobber
849 *
850 */
851 struct Pseudo_reduction_instruction : public Instruction {
852 ReduceOp reduce_op;
853 unsigned cluster_size; // must be 0 for scans
854 };
855
856 struct instr_deleter_functor {
857 void operator()(void* p) {
858 free(p);
859 }
860 };
861
862 template<typename T>
863 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
864
865 template<typename T>
866 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
867 {
868 std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
869 char *data = (char*) calloc(1, size);
870 T* inst = (T*) data;
871
872 inst->opcode = opcode;
873 inst->format = format;
874
875 inst->operands = aco::span<Operand>((Operand*)(data + sizeof(T)), num_operands);
876 inst->definitions = aco::span<Definition>((Definition*)inst->operands.end(), num_definitions);
877
878 return inst;
879 }
880
881 constexpr bool is_phi(Instruction* instr)
882 {
883 return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
884 }
885
886 static inline bool is_phi(aco_ptr<Instruction>& instr)
887 {
888 return is_phi(instr.get());
889 }
890
891 constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
892 {
893 switch (instr->format) {
894 case Format::SMEM:
895 return static_cast<SMEM_instruction*>(instr)->barrier;
896 case Format::MUBUF:
897 return static_cast<MUBUF_instruction*>(instr)->barrier;
898 case Format::MIMG:
899 return static_cast<MIMG_instruction*>(instr)->barrier;
900 case Format::FLAT:
901 case Format::GLOBAL:
902 return barrier_buffer;
903 case Format::DS:
904 return barrier_shared;
905 default:
906 return barrier_none;
907 }
908 }
909
910 enum block_kind {
911 /* uniform indicates that leaving this block,
912 * all actives lanes stay active */
913 block_kind_uniform = 1 << 0,
914 block_kind_top_level = 1 << 1,
915 block_kind_loop_preheader = 1 << 2,
916 block_kind_loop_header = 1 << 3,
917 block_kind_loop_exit = 1 << 4,
918 block_kind_continue = 1 << 5,
919 block_kind_break = 1 << 6,
920 block_kind_continue_or_break = 1 << 7,
921 block_kind_discard = 1 << 8,
922 block_kind_branch = 1 << 9,
923 block_kind_merge = 1 << 10,
924 block_kind_invert = 1 << 11,
925 block_kind_uses_discard_if = 1 << 12,
926 block_kind_needs_lowering = 1 << 13,
927 block_kind_uses_demote = 1 << 14,
928 };
929
930
931 struct RegisterDemand {
932 constexpr RegisterDemand() = default;
933 constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
934 : vgpr{v}, sgpr{s} {}
935 int16_t vgpr = 0;
936 int16_t sgpr = 0;
937
938 constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
939 return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
940 }
941
942 constexpr bool exceeds(const RegisterDemand other) const noexcept {
943 return vgpr > other.vgpr || sgpr > other.sgpr;
944 }
945
946 constexpr RegisterDemand operator+(const Temp t) const noexcept {
947 if (t.type() == RegType::sgpr)
948 return RegisterDemand( vgpr, sgpr + t.size() );
949 else
950 return RegisterDemand( vgpr + t.size(), sgpr );
951 }
952
953 constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
954 return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
955 }
956
957 constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
958 return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
959 }
960
961 constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
962 vgpr += other.vgpr;
963 sgpr += other.sgpr;
964 return *this;
965 }
966
967 constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
968 vgpr -= other.vgpr;
969 sgpr -= other.sgpr;
970 return *this;
971 }
972
973 constexpr RegisterDemand& operator+=(const Temp t) noexcept {
974 if (t.type() == RegType::sgpr)
975 sgpr += t.size();
976 else
977 vgpr += t.size();
978 return *this;
979 }
980
981 constexpr RegisterDemand& operator-=(const Temp t) noexcept {
982 if (t.type() == RegType::sgpr)
983 sgpr -= t.size();
984 else
985 vgpr -= t.size();
986 return *this;
987 }
988
989 constexpr void update(const RegisterDemand other) noexcept {
990 vgpr = std::max(vgpr, other.vgpr);
991 sgpr = std::max(sgpr, other.sgpr);
992 }
993
994 };
995
996 /* CFG */
997 struct Block {
998 unsigned index;
999 unsigned offset = 0;
1000 std::vector<aco_ptr<Instruction>> instructions;
1001 std::vector<unsigned> logical_preds;
1002 std::vector<unsigned> linear_preds;
1003 std::vector<unsigned> logical_succs;
1004 std::vector<unsigned> linear_succs;
1005 RegisterDemand register_demand = RegisterDemand();
1006 uint16_t loop_nest_depth = 0;
1007 uint16_t kind = 0;
1008 int logical_idom = -1;
1009 int linear_idom = -1;
1010 Temp live_out_exec = Temp();
1011
1012 /* this information is needed for predecessors to blocks with phis when
1013 * moving out of ssa */
1014 bool scc_live_out = false;
1015 PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1016
1017 Block(unsigned idx) : index(idx) {}
1018 Block() : index(0) {}
1019 };
1020
1021 using Stage = uint16_t;
1022
1023 /* software stages */
1024 static constexpr Stage sw_vs = 1 << 0;
1025 static constexpr Stage sw_gs = 1 << 1;
1026 static constexpr Stage sw_tcs = 1 << 2;
1027 static constexpr Stage sw_tes = 1 << 3;
1028 static constexpr Stage sw_fs = 1 << 4;
1029 static constexpr Stage sw_cs = 1 << 5;
1030 static constexpr Stage sw_mask = 0x3f;
1031
1032 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1033 static constexpr Stage hw_vs = 1 << 6;
1034 static constexpr Stage hw_es = 1 << 7;
1035 static constexpr Stage hw_gs = 1 << 8; /* not on GFX9. combined into ES on GFX9 (and GFX10/legacy). */
1036 static constexpr Stage hw_ls = 1 << 9;
1037 static constexpr Stage hw_hs = 1 << 10; /* not on GFX9. combined into LS on GFX9 (and GFX10/legacy). */
1038 static constexpr Stage hw_fs = 1 << 11;
1039 static constexpr Stage hw_cs = 1 << 12;
1040 static constexpr Stage hw_mask = 0x7f << 6;
1041
1042 /* possible settings of Program::stage */
1043 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1044 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1045 static constexpr Stage compute_cs = sw_cs | hw_cs;
1046 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1047 /* GFX10/NGG */
1048 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1049 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1050 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1051 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1052 /* GFX9 (and GFX10 if NGG isn't used) */
1053 static constexpr Stage vertex_geometry_es = sw_vs | sw_gs | hw_es;
1054 static constexpr Stage vertex_tess_control_ls = sw_vs | sw_tcs | hw_ls;
1055 static constexpr Stage tess_eval_geometry_es = sw_tes | sw_gs | hw_es;
1056 /* pre-GFX9 */
1057 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1058 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1059 static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before GS */
1060 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1061
1062 class Program final {
1063 public:
1064 std::vector<Block> blocks;
1065 RegisterDemand max_reg_demand = RegisterDemand();
1066 uint16_t sgpr_limit = 0;
1067 uint16_t num_waves = 0;
1068 ac_shader_config* config;
1069 struct radv_shader_info *info;
1070 enum chip_class chip_class;
1071 enum radeon_family family;
1072 unsigned wave_size;
1073 Stage stage; /* Stage */
1074 bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1075 bool needs_wqm = false; /* there exists a p_wqm instruction */
1076 bool wb_smem_l1_on_end = false;
1077
1078 std::vector<uint8_t> constant_data;
1079
1080 uint32_t allocateId()
1081 {
1082 assert(allocationID <= 16777215);
1083 return allocationID++;
1084 }
1085
1086 uint32_t peekAllocationId()
1087 {
1088 return allocationID;
1089 }
1090
1091 void setAllocationId(uint32_t id)
1092 {
1093 allocationID = id;
1094 }
1095
1096 Block* create_and_insert_block() {
1097 blocks.emplace_back(blocks.size());
1098 return &blocks.back();
1099 }
1100
1101 Block* insert_block(Block&& block) {
1102 block.index = blocks.size();
1103 blocks.emplace_back(std::move(block));
1104 return &blocks.back();
1105 }
1106
1107 private:
1108 uint32_t allocationID = 1;
1109 };
1110
1111 struct live {
1112 /* live temps out per block */
1113 std::vector<std::set<Temp>> live_out;
1114 /* register demand (sgpr/vgpr) per instruction per block */
1115 std::vector<std::vector<RegisterDemand>> register_demand;
1116 };
1117
1118 void select_program(Program *program,
1119 unsigned shader_count,
1120 struct nir_shader *const *shaders,
1121 ac_shader_config* config,
1122 struct radv_shader_info *info,
1123 struct radv_nir_compiler_options *options);
1124
1125 void lower_wqm(Program* program, live& live_vars,
1126 const struct radv_nir_compiler_options *options);
1127 void lower_bool_phis(Program* program);
1128 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1129 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1130 std::vector<uint16_t> dead_code_analysis(Program *program);
1131 void dominator_tree(Program* program);
1132 void insert_exec_mask(Program *program);
1133 void value_numbering(Program* program);
1134 void optimize(Program* program);
1135 void setup_reduce_temp(Program* program);
1136 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1137 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1138 void ssa_elimination(Program* program);
1139 void lower_to_hw_instr(Program* program);
1140 void schedule_program(Program* program, live& live_vars);
1141 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1142 void insert_wait_states(Program* program);
1143 void insert_NOPs(Program* program);
1144 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1145 void print_asm(Program *program, std::vector<uint32_t>& binary,
1146 unsigned exec_size, std::ostream& out);
1147 void validate(Program* program, FILE *output);
1148 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1149 #ifndef NDEBUG
1150 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1151 #else
1152 #define perfwarn(program, cond, msg, ...)
1153 #endif
1154
1155 void aco_print_instr(Instruction *instr, FILE *output);
1156 void aco_print_program(Program *program, FILE *output);
1157
1158 typedef struct {
1159 const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1160 const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1161 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1162 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1163 const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1164 const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1165 } Info;
1166
1167 extern const Info instr_info;
1168
1169 }
1170
1171 #endif /* ACO_IR_H */
1172