aco: Initial commit of independent AMD compiler
[mesa.git] / src / amd / compiler / aco_ir.h
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifndef ACO_IR_H
26 #define ACO_IR_H
27
28 #include <vector>
29 #include <set>
30 #include <bitset>
31 #include <memory>
32
33 #include "nir.h"
34 #include "ac_binary.h"
35 #include "amd_family.h"
36 #include "aco_opcodes.h"
37 #include "aco_util.h"
38
39 struct radv_nir_compiler_options;
40 struct radv_shader_info;
41
42 namespace aco {
43
44 extern uint64_t debug_flags;
45
46 enum {
47 DEBUG_VALIDATE = 0x1,
48 DEBUG_VALIDATE_RA = 0x2,
49 DEBUG_PERFWARN = 0x4,
50 };
51
52 /**
53 * Representation of the instruction's microcode encoding format
54 * Note: Some Vector ALU Formats can be combined, such that:
55 * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
56 * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
57 * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
58 *
59 * (*) The same is applicable for VOP1 and VOPC instructions.
60 */
61 enum class Format : std::uint16_t {
62 /* Pseudo Instruction Format */
63 PSEUDO = 0,
64 /* Scalar ALU & Control Formats */
65 SOP1 = 1,
66 SOP2 = 2,
67 SOPK = 3,
68 SOPP = 4,
69 SOPC = 5,
70 /* Scalar Memory Format */
71 SMEM = 6,
72 /* LDS/GDS Format */
73 DS = 8,
74 /* Vector Memory Buffer Formats */
75 MTBUF = 9,
76 MUBUF = 10,
77 /* Vector Memory Image Format */
78 MIMG = 11,
79 /* Export Format */
80 EXP = 12,
81 /* Flat Formats */
82 FLAT = 13,
83 GLOBAL = 14,
84 SCRATCH = 15,
85
86 PSEUDO_BRANCH = 16,
87 PSEUDO_BARRIER = 17,
88 PSEUDO_REDUCTION = 18,
89
90 /* Vector ALU Formats */
91 VOP1 = 1 << 8,
92 VOP2 = 1 << 9,
93 VOPC = 1 << 10,
94 VOP3 = 1 << 11,
95 VOP3A = 1 << 11,
96 VOP3B = 1 << 11,
97 VOP3P = 1 << 12,
98 /* Vector Parameter Interpolation Format */
99 VINTRP = 1 << 13,
100 DPP = 1 << 14,
101 SDWA = 1 << 15,
102 };
103
104 enum barrier_interaction {
105 barrier_none = 0,
106 barrier_buffer = 0x1,
107 barrier_image = 0x2,
108 barrier_atomic = 0x4,
109 barrier_shared = 0x8,
110 barrier_count = 4,
111 };
112
113 constexpr Format asVOP3(Format format) {
114 return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
115 };
116
117 enum class RegType {
118 none = 0,
119 sgpr,
120 vgpr,
121 linear_vgpr,
122 };
123
124 struct RegClass {
125
126 enum RC : uint8_t {
127 s1 = 1,
128 s2 = 2,
129 s3 = 3,
130 s4 = 4,
131 s6 = 6,
132 s8 = 8,
133 s16 = 16,
134 v1 = s1 | (1 << 5),
135 v2 = s2 | (1 << 5),
136 v3 = s3 | (1 << 5),
137 v4 = s4 | (1 << 5),
138 v5 = 5 | (1 << 5),
139 v6 = 6 | (1 << 5),
140 v7 = 7 | (1 << 5),
141 v8 = 8 | (1 << 5),
142 /* these are used for WWM and spills to vgpr */
143 v1_linear = v1 | (1 << 6),
144 v2_linear = v2 | (1 << 6),
145 };
146
147 RegClass() = default;
148 constexpr RegClass(RC rc)
149 : rc(rc) {}
150 constexpr RegClass(RegType type, unsigned size)
151 : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
152
153 constexpr operator RC() const { return rc; }
154 explicit operator bool() = delete;
155
156 constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
157 constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
158 constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
159 constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
160
161 private:
162 RC rc;
163 };
164
165 /* transitional helper expressions */
166 static constexpr RegClass s1{RegClass::s1};
167 static constexpr RegClass s2{RegClass::s2};
168 static constexpr RegClass s3{RegClass::s3};
169 static constexpr RegClass s4{RegClass::s4};
170 static constexpr RegClass s8{RegClass::s8};
171 static constexpr RegClass s16{RegClass::s16};
172 static constexpr RegClass v1{RegClass::v1};
173 static constexpr RegClass v2{RegClass::v2};
174 static constexpr RegClass v3{RegClass::v3};
175 static constexpr RegClass v4{RegClass::v4};
176 static constexpr RegClass v5{RegClass::v5};
177 static constexpr RegClass v6{RegClass::v6};
178 static constexpr RegClass v7{RegClass::v7};
179 static constexpr RegClass v8{RegClass::v8};
180
181 /**
182 * Temp Class
183 * Each temporary virtual register has a
184 * register class (i.e. size and type)
185 * and SSA id.
186 */
187 struct Temp {
188 Temp() = default;
189 constexpr Temp(uint32_t id, RegClass cls) noexcept
190 : id_(id), reg_class(cls) {}
191
192 constexpr uint32_t id() const noexcept { return id_; }
193 constexpr RegClass regClass() const noexcept { return reg_class; }
194
195 constexpr unsigned size() const noexcept { return reg_class.size(); }
196 constexpr RegType type() const noexcept { return reg_class.type(); }
197 constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
198
199 constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
200 constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
201 constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
202
203 private:
204 uint32_t id_:24;
205 RegClass reg_class;
206 };
207
208 /**
209 * PhysReg
210 * Represents the physical register for each
211 * Operand and Definition.
212 */
213 struct PhysReg {
214 constexpr PhysReg() = default;
215 explicit constexpr PhysReg(unsigned r) : reg(r) {}
216 constexpr operator unsigned() const { return reg; }
217
218 uint16_t reg = 0;
219 };
220
221 /* helper expressions for special registers */
222 static constexpr PhysReg m0{124};
223 static constexpr PhysReg vcc{106};
224 static constexpr PhysReg exec{126};
225 static constexpr PhysReg exec_lo{126};
226 static constexpr PhysReg exec_hi{127};
227 static constexpr PhysReg scc{253};
228
229 /**
230 * Operand Class
231 * Initially, each Operand refers to either
232 * a temporary virtual register
233 * or to a constant value
234 * Temporary registers get mapped to physical register during RA
235 * Constant values are inlined into the instruction sequence.
236 */
237 class Operand final
238 {
239 public:
240 constexpr Operand()
241 : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
242 isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
243
244 explicit Operand(Temp r) noexcept
245 {
246 data_.temp = r;
247 if (r.id()) {
248 isTemp_ = true;
249 } else {
250 isUndef_ = true;
251 setFixed(PhysReg{128});
252 }
253 };
254 explicit Operand(uint32_t v) noexcept
255 {
256 data_.i = v;
257 isConstant_ = true;
258 if (v <= 64)
259 setFixed(PhysReg{128 + v});
260 else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
261 setFixed(PhysReg{192 - v});
262 else if (v == 0x3f000000) /* 0.5 */
263 setFixed(PhysReg{240});
264 else if (v == 0xbf000000) /* -0.5 */
265 setFixed(PhysReg{241});
266 else if (v == 0x3f800000) /* 1.0 */
267 setFixed(PhysReg{242});
268 else if (v == 0xbf800000) /* -1.0 */
269 setFixed(PhysReg{243});
270 else if (v == 0x40000000) /* 2.0 */
271 setFixed(PhysReg{244});
272 else if (v == 0xc0000000) /* -2.0 */
273 setFixed(PhysReg{245});
274 else if (v == 0x40800000) /* 4.0 */
275 setFixed(PhysReg{246});
276 else if (v == 0xc0800000) /* -4.0 */
277 setFixed(PhysReg{247});
278 else if (v == 0x3e22f983) /* 1/(2*PI) */
279 setFixed(PhysReg{248});
280 else /* Literal Constant */
281 setFixed(PhysReg{255});
282 };
283 explicit Operand(uint64_t v) noexcept
284 {
285 isConstant_ = true;
286 is64BitConst_ = true;
287 if (v <= 64)
288 setFixed(PhysReg{128 + (uint32_t) v});
289 else if (v >= 0xFFFFFFFFFFFFFFF0) /* [-16 .. -1] */
290 setFixed(PhysReg{192 - (uint32_t) v});
291 else if (v == 0x3FE0000000000000) /* 0.5 */
292 setFixed(PhysReg{240});
293 else if (v == 0xBFE0000000000000) /* -0.5 */
294 setFixed(PhysReg{241});
295 else if (v == 0x3FF0000000000000) /* 1.0 */
296 setFixed(PhysReg{242});
297 else if (v == 0xBFF0000000000000) /* -1.0 */
298 setFixed(PhysReg{243});
299 else if (v == 0x4000000000000000) /* 2.0 */
300 setFixed(PhysReg{244});
301 else if (v == 0xC000000000000000) /* -2.0 */
302 setFixed(PhysReg{245});
303 else if (v == 0x4010000000000000) /* 4.0 */
304 setFixed(PhysReg{246});
305 else if (v == 0xC010000000000000) /* -4.0 */
306 setFixed(PhysReg{247});
307 else if (v == 0x3fc45f306dc9c882) /* 1/(2*PI) */
308 setFixed(PhysReg{248});
309 else { /* Literal Constant: we don't know if it is a long or double.*/
310 isConstant_ = 0;
311 assert(false && "attempt to create a 64-bit literal constant");
312 }
313 };
314 explicit Operand(RegClass type) noexcept
315 {
316 isUndef_ = true;
317 data_.temp = Temp(0, type);
318 setFixed(PhysReg{128});
319 };
320 explicit Operand(PhysReg reg, RegClass type) noexcept
321 {
322 data_.temp = Temp(0, type);
323 setFixed(reg);
324 }
325
326 constexpr bool isTemp() const noexcept
327 {
328 return isTemp_;
329 }
330
331 constexpr void setTemp(Temp t) noexcept {
332 assert(!isConstant_);
333 isTemp_ = true;
334 data_.temp = t;
335 }
336
337 constexpr Temp getTemp() const noexcept
338 {
339 return data_.temp;
340 }
341
342 constexpr uint32_t tempId() const noexcept
343 {
344 return data_.temp.id();
345 }
346
347 constexpr bool hasRegClass() const noexcept
348 {
349 return isTemp() || isUndefined();
350 }
351
352 constexpr RegClass regClass() const noexcept
353 {
354 return data_.temp.regClass();
355 }
356
357 constexpr unsigned size() const noexcept
358 {
359 if (isConstant())
360 return is64BitConst_ ? 2 : 1;
361 else
362 return data_.temp.size();
363 }
364
365 constexpr bool isFixed() const noexcept
366 {
367 return isFixed_;
368 }
369
370 constexpr PhysReg physReg() const noexcept
371 {
372 return reg_;
373 }
374
375 constexpr void setFixed(PhysReg reg) noexcept
376 {
377 isFixed_ = reg != unsigned(-1);
378 reg_ = reg;
379 }
380
381 constexpr bool isConstant() const noexcept
382 {
383 return isConstant_;
384 }
385
386 constexpr bool isLiteral() const noexcept
387 {
388 return isConstant() && reg_ == 255;
389 }
390
391 constexpr bool isUndefined() const noexcept
392 {
393 return isUndef_;
394 }
395
396 constexpr uint32_t constantValue() const noexcept
397 {
398 return data_.i;
399 }
400
401 constexpr bool constantEquals(uint32_t cmp) const noexcept
402 {
403 return isConstant() && constantValue() == cmp;
404 }
405
406 constexpr void setKill(bool flag) noexcept
407 {
408 isKill_ = flag;
409 if (!flag)
410 setFirstKill(false);
411 }
412
413 constexpr bool isKill() const noexcept
414 {
415 return isKill_ || isFirstKill();
416 }
417
418 constexpr void setFirstKill(bool flag) noexcept
419 {
420 isFirstKill_ = flag;
421 if (flag)
422 setKill(flag);
423 }
424
425 /* When there are multiple operands killing the same temporary,
426 * isFirstKill() is only returns true for the first one. */
427 constexpr bool isFirstKill() const noexcept
428 {
429 return isFirstKill_;
430 }
431
432 private:
433 union {
434 uint32_t i;
435 float f;
436 Temp temp = Temp(0, s1);
437 } data_;
438 PhysReg reg_;
439 union {
440 struct {
441 uint8_t isTemp_:1;
442 uint8_t isFixed_:1;
443 uint8_t isConstant_:1;
444 uint8_t isKill_:1;
445 uint8_t isUndef_:1;
446 uint8_t isFirstKill_:1;
447 uint8_t is64BitConst_:1;
448 };
449 /* can't initialize bit-fields in c++11, so work around using a union */
450 uint8_t control_ = 0;
451 };
452 };
453
454 /**
455 * Definition Class
456 * Definitions are the results of Instructions
457 * and refer to temporary virtual registers
458 * which are later mapped to physical registers
459 */
460 class Definition final
461 {
462 public:
463 constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
464 Definition(uint32_t index, RegClass type) noexcept
465 : temp(index, type) {}
466 explicit Definition(Temp tmp) noexcept
467 : temp(tmp) {}
468 Definition(PhysReg reg, RegClass type) noexcept
469 : temp(Temp(0, type))
470 {
471 setFixed(reg);
472 }
473 Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
474 : temp(Temp(tmpId, type))
475 {
476 setFixed(reg);
477 }
478
479 constexpr bool isTemp() const noexcept
480 {
481 return tempId() > 0;
482 }
483
484 constexpr Temp getTemp() const noexcept
485 {
486 return temp;
487 }
488
489 constexpr uint32_t tempId() const noexcept
490 {
491 return temp.id();
492 }
493
494 constexpr void setTemp(Temp t) noexcept {
495 temp = t;
496 }
497
498 constexpr RegClass regClass() const noexcept
499 {
500 return temp.regClass();
501 }
502
503 constexpr unsigned size() const noexcept
504 {
505 return temp.size();
506 }
507
508 constexpr bool isFixed() const noexcept
509 {
510 return isFixed_;
511 }
512
513 constexpr PhysReg physReg() const noexcept
514 {
515 return reg_;
516 }
517
518 constexpr void setFixed(PhysReg reg) noexcept
519 {
520 isFixed_ = 1;
521 reg_ = reg;
522 }
523
524 constexpr void setHint(PhysReg reg) noexcept
525 {
526 hasHint_ = 1;
527 reg_ = reg;
528 }
529
530 constexpr bool hasHint() const noexcept
531 {
532 return hasHint_;
533 }
534
535 constexpr void setKill(bool flag) noexcept
536 {
537 isKill_ = flag;
538 }
539
540 constexpr bool isKill() const noexcept
541 {
542 return isKill_;
543 }
544
545 private:
546 Temp temp = Temp(0, s1);
547 PhysReg reg_;
548 union {
549 struct {
550 uint8_t isFixed_:1;
551 uint8_t hasHint_:1;
552 uint8_t isKill_:1;
553 };
554 /* can't initialize bit-fields in c++11, so work around using a union */
555 uint8_t control_ = 0;
556 };
557 };
558
559 class Block;
560
561 struct Instruction {
562 aco_opcode opcode;
563 Format format;
564
565 aco::span<Operand> operands;
566 aco::span<Definition> definitions;
567
568 constexpr bool isVALU() const noexcept
569 {
570 return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
571 || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
572 || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
573 || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
574 || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
575 || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
576 }
577
578 constexpr bool isSALU() const noexcept
579 {
580 return format == Format::SOP1 ||
581 format == Format::SOP2 ||
582 format == Format::SOPC ||
583 format == Format::SOPK ||
584 format == Format::SOPP;
585 }
586
587 constexpr bool isVMEM() const noexcept
588 {
589 return format == Format::MTBUF ||
590 format == Format::MUBUF ||
591 format == Format::MIMG;
592 }
593
594 constexpr bool isDPP() const noexcept
595 {
596 return (uint16_t) format & (uint16_t) Format::DPP;
597 }
598
599 constexpr bool isVOP3() const noexcept
600 {
601 return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
602 ((uint16_t) format & (uint16_t) Format::VOP3B) ||
603 format == Format::VOP3P;
604 }
605
606 constexpr bool isSDWA() const noexcept
607 {
608 return (uint16_t) format & (uint16_t) Format::SDWA;
609 }
610
611 constexpr bool isFlatOrGlobal() const noexcept
612 {
613 return format == Format::FLAT || format == Format::GLOBAL;
614 }
615 };
616
617 struct SOPK_instruction : public Instruction {
618 uint16_t imm;
619 };
620
621 struct SOPP_instruction : public Instruction {
622 uint32_t imm;
623 int block;
624 };
625
626 struct SOPC_instruction : public Instruction {
627 };
628
629 struct SOP1_instruction : public Instruction {
630 };
631
632 struct SOP2_instruction : public Instruction {
633 };
634
635 /**
636 * Scalar Memory Format:
637 * For s_(buffer_)load_dword*:
638 * Operand(0): SBASE - SGPR-pair which provides base address
639 * Operand(1): Offset - immediate (un)signed offset or SGPR
640 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
641 * Operand(n-1): SOffset - SGPR offset (Vega only)
642 *
643 * Having no operands is also valid for instructions such as s_dcache_inv.
644 *
645 */
646 struct SMEM_instruction : public Instruction {
647 bool glc; /* VI+: globally coherent */
648 bool dlc; /* NAVI: device level coherent */
649 bool nv; /* VEGA only: Non-volatile */
650 bool can_reorder;
651 bool disable_wqm;
652 barrier_interaction barrier;
653 };
654
655 struct VOP1_instruction : public Instruction {
656 };
657
658 struct VOP2_instruction : public Instruction {
659 };
660
661 struct VOPC_instruction : public Instruction {
662 };
663
664 struct VOP3A_instruction : public Instruction {
665 bool abs[3];
666 bool opsel[3];
667 bool clamp;
668 unsigned omod;
669 bool neg[3];
670 };
671
672 /**
673 * Data Parallel Primitives Format:
674 * This format can be used for VOP1, VOP2 or VOPC instructions.
675 * The swizzle applies to the src0 operand.
676 *
677 */
678 struct DPP_instruction : public Instruction {
679 uint16_t dpp_ctrl;
680 uint8_t row_mask;
681 uint8_t bank_mask;
682 bool abs[2];
683 bool neg[2];
684 bool bound_ctrl;
685 };
686
687 struct Interp_instruction : public Instruction {
688 unsigned attribute;
689 unsigned component;
690 };
691
692 /**
693 * Local and Global Data Sharing instructions
694 * Operand(0): ADDR - VGPR which supplies the address.
695 * Operand(1): DATA0 - First data VGPR.
696 * Operand(2): DATA1 - Second data VGPR.
697 * Operand(n-1): M0 - LDS size.
698 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
699 *
700 */
701 struct DS_instruction : public Instruction {
702 int16_t offset0;
703 int8_t offset1;
704 bool gds;
705 };
706
707 /**
708 * Vector Memory Untyped-buffer Instructions
709 * Operand(0): VADDR - Address source. Can carry an index and/or offset
710 * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
711 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
712 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
713 *
714 */
715 struct MUBUF_instruction : public Instruction {
716 unsigned offset; /* Unsigned byte offset - 12 bit */
717 bool offen; /* Supply an offset from VGPR (VADDR) */
718 bool idxen; /* Supply an index from VGPR (VADDR) */
719 bool glc; /* globally coherent */
720 bool dlc; /* NAVI: device level coherent */
721 bool slc; /* system level coherent */
722 bool tfe; /* texture fail enable */
723 bool lds; /* Return read-data to LDS instead of VGPRs */
724 bool disable_wqm; /* Require an exec mask without helper invocations */
725 bool can_reorder;
726 barrier_interaction barrier;
727 };
728
729 /**
730 * Vector Memory Typed-buffer Instructions
731 * Operand(0): VADDR - Address source. Can carry an index and/or offset
732 * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
733 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
734 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
735 *
736 */
737 struct MTBUF_instruction : public Instruction {
738 union {
739 struct {
740 uint8_t dfmt : 4; /* Data Format of data in memory buffer */
741 uint8_t nfmt : 3; /* Numeric format of data in memory */
742 };
743 uint8_t img_format; /* Buffer or image format as used by GFX10 */
744 };
745 unsigned offset; /* Unsigned byte offset - 12 bit */
746 bool offen; /* Supply an offset from VGPR (VADDR) */
747 bool idxen; /* Supply an index from VGPR (VADDR) */
748 bool glc; /* globally coherent */
749 bool dlc; /* NAVI: device level coherent */
750 bool slc; /* system level coherent */
751 bool tfe; /* texture fail enable */
752 bool disable_wqm; /* Require an exec mask without helper invocations */
753 bool can_reorder;
754 barrier_interaction barrier;
755 };
756
757 /**
758 * Vector Memory Image Instructions
759 * Operand(0): VADDR - Address source. Can carry an offset or an index.
760 * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
761 * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
762 * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
763 *
764 */
765 struct MIMG_instruction : public Instruction {
766 unsigned dmask; /* Data VGPR enable mask */
767 bool unrm; /* Force address to be un-normalized */
768 bool dlc; /* NAVI: device level coherent */
769 bool glc; /* globally coherent */
770 bool slc; /* system level coherent */
771 bool tfe; /* texture fail enable */
772 bool da; /* declare an array */
773 bool lwe; /* Force data to be un-normalized */
774 bool r128; /* NAVI: Texture resource size */
775 bool a16; /* VEGA, NAVI: Address components are 16-bits */
776 bool d16; /* Convert 32-bit data to 16-bit data */
777 bool disable_wqm; /* Require an exec mask without helper invocations */
778 bool can_reorder;
779 barrier_interaction barrier;
780 };
781
782 /**
783 * Flat/Scratch/Global Instructions
784 * Operand(0): ADDR
785 * Operand(1): SADDR
786 * Operand(2) / Definition(0): DATA/VDST
787 *
788 */
789 struct FLAT_instruction : public Instruction {
790 uint16_t offset; /* Vega only */
791 bool slc;
792 bool glc;
793 bool lds;
794 bool nv;
795 };
796
797 struct Export_instruction : public Instruction {
798 unsigned enabled_mask;
799 unsigned dest;
800 bool compressed;
801 bool done;
802 bool valid_mask;
803 };
804
805 struct Pseudo_instruction : public Instruction {
806 bool tmp_in_scc;
807 PhysReg scratch_sgpr; /* might not be valid if it's not needed */
808 };
809
810 struct Pseudo_branch_instruction : public Instruction {
811 /* target[0] is the block index of the branch target.
812 * For conditional branches, target[1] contains the fall-through alternative.
813 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
814 */
815 uint32_t target[2];
816 };
817
818 struct Pseudo_barrier_instruction : public Instruction {
819 };
820
821 enum ReduceOp {
822 iadd32, iadd64,
823 imul32, imul64,
824 fadd32, fadd64,
825 fmul32, fmul64,
826 imin32, imin64,
827 imax32, imax64,
828 umin32, umin64,
829 umax32, umax64,
830 fmin32, fmin64,
831 fmax32, fmax64,
832 iand32, iand64,
833 ior32, ior64,
834 ixor32, ixor64,
835 };
836
837 /**
838 * Subgroup Reduction Instructions, everything except for the data to be
839 * reduced and the result as inserted by setup_reduce_temp().
840 * Operand(0): data to be reduced
841 * Operand(1): reduce temporary
842 * Operand(2): vector temporary
843 * Definition(0): result
844 * Definition(1): scalar temporary
845 * Definition(2): scalar identity temporary
846 * Definition(3): scc clobber
847 * Definition(4): vcc clobber
848 *
849 */
850 struct Pseudo_reduction_instruction : public Instruction {
851 ReduceOp reduce_op;
852 unsigned cluster_size; // must be 0 for scans
853 };
854
855 struct instr_deleter_functor {
856 void operator()(void* p) {
857 free(p);
858 }
859 };
860
861 template<typename T>
862 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
863
864 template<typename T>
865 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
866 {
867 std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
868 char *data = (char*) calloc(1, size);
869 T* inst = (T*) data;
870
871 inst->opcode = opcode;
872 inst->format = format;
873
874 inst->operands = aco::span<Operand>((Operand*)(data + sizeof(T)), num_operands);
875 inst->definitions = aco::span<Definition>((Definition*)inst->operands.end(), num_definitions);
876
877 return inst;
878 }
879
880 constexpr bool is_phi(Instruction* instr)
881 {
882 return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
883 }
884
885 static inline bool is_phi(aco_ptr<Instruction>& instr)
886 {
887 return is_phi(instr.get());
888 }
889
890 constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
891 {
892 switch (instr->format) {
893 case Format::SMEM:
894 return static_cast<SMEM_instruction*>(instr)->barrier;
895 case Format::MUBUF:
896 return static_cast<MUBUF_instruction*>(instr)->barrier;
897 case Format::MIMG:
898 return static_cast<MIMG_instruction*>(instr)->barrier;
899 case Format::FLAT:
900 case Format::GLOBAL:
901 return barrier_buffer;
902 case Format::DS:
903 return barrier_shared;
904 default:
905 return barrier_none;
906 }
907 }
908
909 enum block_kind {
910 /* uniform indicates that leaving this block,
911 * all actives lanes stay active */
912 block_kind_uniform = 1 << 0,
913 block_kind_top_level = 1 << 1,
914 block_kind_loop_preheader = 1 << 2,
915 block_kind_loop_header = 1 << 3,
916 block_kind_loop_exit = 1 << 4,
917 block_kind_continue = 1 << 5,
918 block_kind_break = 1 << 6,
919 block_kind_continue_or_break = 1 << 7,
920 block_kind_discard = 1 << 8,
921 block_kind_branch = 1 << 9,
922 block_kind_merge = 1 << 10,
923 block_kind_invert = 1 << 11,
924 block_kind_uses_discard_if = 1 << 12,
925 block_kind_needs_lowering = 1 << 13,
926 };
927
928
929 struct RegisterDemand {
930 constexpr RegisterDemand() = default;
931 constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
932 : vgpr{v}, sgpr{s} {}
933 int16_t vgpr = 0;
934 int16_t sgpr = 0;
935
936 constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
937 return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
938 }
939
940 constexpr bool exceeds(const RegisterDemand other) const noexcept {
941 return vgpr > other.vgpr || sgpr > other.sgpr;
942 }
943
944 constexpr RegisterDemand operator+(const Temp t) const noexcept {
945 if (t.type() == RegType::sgpr)
946 return RegisterDemand( vgpr, sgpr + t.size() );
947 else
948 return RegisterDemand( vgpr + t.size(), sgpr );
949 }
950
951 constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
952 return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
953 }
954
955 constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
956 return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
957 }
958
959 constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
960 vgpr += other.vgpr;
961 sgpr += other.sgpr;
962 return *this;
963 }
964
965 constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
966 vgpr -= other.vgpr;
967 sgpr -= other.sgpr;
968 return *this;
969 }
970
971 constexpr RegisterDemand& operator+=(const Temp t) noexcept {
972 if (t.type() == RegType::sgpr)
973 sgpr += t.size();
974 else
975 vgpr += t.size();
976 return *this;
977 }
978
979 constexpr RegisterDemand& operator-=(const Temp t) noexcept {
980 if (t.type() == RegType::sgpr)
981 sgpr -= t.size();
982 else
983 vgpr -= t.size();
984 return *this;
985 }
986
987 constexpr void update(const RegisterDemand other) noexcept {
988 vgpr = std::max(vgpr, other.vgpr);
989 sgpr = std::max(sgpr, other.sgpr);
990 }
991
992 };
993
994 /* CFG */
995 struct Block {
996 unsigned index;
997 unsigned offset = 0;
998 std::vector<aco_ptr<Instruction>> instructions;
999 std::vector<unsigned> logical_preds;
1000 std::vector<unsigned> linear_preds;
1001 std::vector<unsigned> logical_succs;
1002 std::vector<unsigned> linear_succs;
1003 RegisterDemand register_demand = RegisterDemand();
1004 uint16_t loop_nest_depth = 0;
1005 uint16_t kind = 0;
1006 int logical_idom = -1;
1007 int linear_idom = -1;
1008 Temp live_out_exec = Temp();
1009
1010 /* this information is needed for predecessors to blocks with phis when
1011 * moving out of ssa */
1012 bool scc_live_out = false;
1013 PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1014
1015 Block(unsigned idx) : index(idx) {}
1016 Block() : index(0) {}
1017 };
1018
1019 using Stage = uint16_t;
1020
1021 /* software stages */
1022 static constexpr Stage sw_vs = 1 << 0;
1023 static constexpr Stage sw_gs = 1 << 1;
1024 static constexpr Stage sw_tcs = 1 << 2;
1025 static constexpr Stage sw_tes = 1 << 3;
1026 static constexpr Stage sw_fs = 1 << 4;
1027 static constexpr Stage sw_cs = 1 << 5;
1028 static constexpr Stage sw_mask = 0x3f;
1029
1030 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1031 static constexpr Stage hw_vs = 1 << 6;
1032 static constexpr Stage hw_es = 1 << 7;
1033 static constexpr Stage hw_gs = 1 << 8; /* not on GFX9. combined into ES on GFX9 (and GFX10/legacy). */
1034 static constexpr Stage hw_ls = 1 << 9;
1035 static constexpr Stage hw_hs = 1 << 10; /* not on GFX9. combined into LS on GFX9 (and GFX10/legacy). */
1036 static constexpr Stage hw_fs = 1 << 11;
1037 static constexpr Stage hw_cs = 1 << 12;
1038 static constexpr Stage hw_mask = 0x7f << 6;
1039
1040 /* possible settings of Program::stage */
1041 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1042 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1043 static constexpr Stage compute_cs = sw_cs | hw_cs;
1044 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1045 /* GFX10/NGG */
1046 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1047 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1048 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1049 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1050 /* GFX9 (and GFX10 if NGG isn't used) */
1051 static constexpr Stage vertex_geometry_es = sw_vs | sw_gs | hw_es;
1052 static constexpr Stage vertex_tess_control_ls = sw_vs | sw_tcs | hw_ls;
1053 static constexpr Stage tess_eval_geometry_es = sw_tes | sw_gs | hw_es;
1054 /* pre-GFX9 */
1055 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1056 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1057 static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before GS */
1058 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1059
1060 class Program final {
1061 public:
1062 std::vector<Block> blocks;
1063 RegisterDemand max_reg_demand = RegisterDemand();
1064 uint16_t sgpr_limit = 0;
1065 uint16_t num_waves = 0;
1066 ac_shader_config* config;
1067 struct radv_shader_info *info;
1068 enum chip_class chip_class;
1069 enum radeon_family family;
1070 Stage stage; /* Stage */
1071 bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1072 bool needs_wqm = false; /* there exists a p_wqm instruction */
1073 bool wb_smem_l1_on_end = false;
1074
1075 std::vector<uint8_t> constant_data;
1076
1077 uint32_t allocateId()
1078 {
1079 assert(allocationID <= 16777215);
1080 return allocationID++;
1081 }
1082
1083 uint32_t peekAllocationId()
1084 {
1085 return allocationID;
1086 }
1087
1088 void setAllocationId(uint32_t id)
1089 {
1090 allocationID = id;
1091 }
1092
1093 Block* create_and_insert_block() {
1094 blocks.emplace_back(blocks.size());
1095 return &blocks.back();
1096 }
1097
1098 Block* insert_block(Block&& block) {
1099 block.index = blocks.size();
1100 blocks.emplace_back(std::move(block));
1101 return &blocks.back();
1102 }
1103
1104 private:
1105 uint32_t allocationID = 1;
1106 };
1107
1108 struct live {
1109 /* live temps out per block */
1110 std::vector<std::set<Temp>> live_out;
1111 /* register demand (sgpr/vgpr) per instruction per block */
1112 std::vector<std::vector<RegisterDemand>> register_demand;
1113 };
1114
1115 void select_program(Program *program,
1116 unsigned shader_count,
1117 struct nir_shader *const *shaders,
1118 ac_shader_config* config,
1119 struct radv_shader_info *info,
1120 struct radv_nir_compiler_options *options);
1121
1122 void lower_wqm(Program* program, live& live_vars,
1123 const struct radv_nir_compiler_options *options);
1124 void lower_bool_phis(Program* program);
1125 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1126 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1127 std::vector<uint16_t> dead_code_analysis(Program *program);
1128 void dominator_tree(Program* program);
1129 void insert_exec_mask(Program *program);
1130 void value_numbering(Program* program);
1131 void optimize(Program* program);
1132 void setup_reduce_temp(Program* program);
1133 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1134 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1135 void ssa_elimination(Program* program);
1136 void lower_to_hw_instr(Program* program);
1137 void schedule_program(Program* program, live& live_vars);
1138 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1139 void insert_wait_states(Program* program);
1140 void insert_NOPs(Program* program);
1141 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1142 void print_asm(Program *program, std::vector<uint32_t>& binary, unsigned exec_size,
1143 enum radeon_family family, std::ostream& out);
1144 void validate(Program* program, FILE *output);
1145 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1146 #ifndef NDEBUG
1147 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1148 #else
1149 #define perfwarn(program, cond, msg, ...)
1150 #endif
1151
1152 void aco_print_instr(Instruction *instr, FILE *output);
1153 void aco_print_program(Program *program, FILE *output);
1154
1155 typedef struct {
1156 const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1157 const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1158 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1159 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1160 const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1161 const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1162 } Info;
1163
1164 extern const Info instr_info;
1165
1166 }
1167
1168 #endif /* ACO_IR_H */
1169