aco: Add missing GFX10 specific fields and some README notes.
[mesa.git] / src / amd / compiler / aco_ir.h
1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifndef ACO_IR_H
26 #define ACO_IR_H
27
28 #include <vector>
29 #include <set>
30 #include <bitset>
31 #include <memory>
32
33 #include "nir.h"
34 #include "ac_binary.h"
35 #include "amd_family.h"
36 #include "aco_opcodes.h"
37 #include "aco_util.h"
38
39 struct radv_nir_compiler_options;
40 struct radv_shader_info;
41
42 namespace aco {
43
44 extern uint64_t debug_flags;
45
46 enum {
47 DEBUG_VALIDATE = 0x1,
48 DEBUG_VALIDATE_RA = 0x2,
49 DEBUG_PERFWARN = 0x4,
50 };
51
52 /**
53 * Representation of the instruction's microcode encoding format
54 * Note: Some Vector ALU Formats can be combined, such that:
55 * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
56 * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
57 * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
58 *
59 * (*) The same is applicable for VOP1 and VOPC instructions.
60 */
61 enum class Format : std::uint16_t {
62 /* Pseudo Instruction Format */
63 PSEUDO = 0,
64 /* Scalar ALU & Control Formats */
65 SOP1 = 1,
66 SOP2 = 2,
67 SOPK = 3,
68 SOPP = 4,
69 SOPC = 5,
70 /* Scalar Memory Format */
71 SMEM = 6,
72 /* LDS/GDS Format */
73 DS = 8,
74 /* Vector Memory Buffer Formats */
75 MTBUF = 9,
76 MUBUF = 10,
77 /* Vector Memory Image Format */
78 MIMG = 11,
79 /* Export Format */
80 EXP = 12,
81 /* Flat Formats */
82 FLAT = 13,
83 GLOBAL = 14,
84 SCRATCH = 15,
85
86 PSEUDO_BRANCH = 16,
87 PSEUDO_BARRIER = 17,
88 PSEUDO_REDUCTION = 18,
89
90 /* Vector ALU Formats */
91 VOP1 = 1 << 8,
92 VOP2 = 1 << 9,
93 VOPC = 1 << 10,
94 VOP3 = 1 << 11,
95 VOP3A = 1 << 11,
96 VOP3B = 1 << 11,
97 VOP3P = 1 << 12,
98 /* Vector Parameter Interpolation Format */
99 VINTRP = 1 << 13,
100 DPP = 1 << 14,
101 SDWA = 1 << 15,
102 };
103
104 enum barrier_interaction {
105 barrier_none = 0,
106 barrier_buffer = 0x1,
107 barrier_image = 0x2,
108 barrier_atomic = 0x4,
109 barrier_shared = 0x8,
110 barrier_count = 4,
111 };
112
113 constexpr Format asVOP3(Format format) {
114 return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
115 };
116
117 enum class RegType {
118 none = 0,
119 sgpr,
120 vgpr,
121 linear_vgpr,
122 };
123
124 struct RegClass {
125
126 enum RC : uint8_t {
127 s1 = 1,
128 s2 = 2,
129 s3 = 3,
130 s4 = 4,
131 s6 = 6,
132 s8 = 8,
133 s16 = 16,
134 v1 = s1 | (1 << 5),
135 v2 = s2 | (1 << 5),
136 v3 = s3 | (1 << 5),
137 v4 = s4 | (1 << 5),
138 v5 = 5 | (1 << 5),
139 v6 = 6 | (1 << 5),
140 v7 = 7 | (1 << 5),
141 v8 = 8 | (1 << 5),
142 /* these are used for WWM and spills to vgpr */
143 v1_linear = v1 | (1 << 6),
144 v2_linear = v2 | (1 << 6),
145 };
146
147 RegClass() = default;
148 constexpr RegClass(RC rc)
149 : rc(rc) {}
150 constexpr RegClass(RegType type, unsigned size)
151 : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
152
153 constexpr operator RC() const { return rc; }
154 explicit operator bool() = delete;
155
156 constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
157 constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
158 constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
159 constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
160
161 private:
162 RC rc;
163 };
164
165 /* transitional helper expressions */
166 static constexpr RegClass s1{RegClass::s1};
167 static constexpr RegClass s2{RegClass::s2};
168 static constexpr RegClass s3{RegClass::s3};
169 static constexpr RegClass s4{RegClass::s4};
170 static constexpr RegClass s8{RegClass::s8};
171 static constexpr RegClass s16{RegClass::s16};
172 static constexpr RegClass v1{RegClass::v1};
173 static constexpr RegClass v2{RegClass::v2};
174 static constexpr RegClass v3{RegClass::v3};
175 static constexpr RegClass v4{RegClass::v4};
176 static constexpr RegClass v5{RegClass::v5};
177 static constexpr RegClass v6{RegClass::v6};
178 static constexpr RegClass v7{RegClass::v7};
179 static constexpr RegClass v8{RegClass::v8};
180
181 /**
182 * Temp Class
183 * Each temporary virtual register has a
184 * register class (i.e. size and type)
185 * and SSA id.
186 */
187 struct Temp {
188 Temp() = default;
189 constexpr Temp(uint32_t id, RegClass cls) noexcept
190 : id_(id), reg_class(cls) {}
191
192 constexpr uint32_t id() const noexcept { return id_; }
193 constexpr RegClass regClass() const noexcept { return reg_class; }
194
195 constexpr unsigned size() const noexcept { return reg_class.size(); }
196 constexpr RegType type() const noexcept { return reg_class.type(); }
197 constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
198
199 constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
200 constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
201 constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
202
203 private:
204 uint32_t id_:24;
205 RegClass reg_class;
206 };
207
208 /**
209 * PhysReg
210 * Represents the physical register for each
211 * Operand and Definition.
212 */
213 struct PhysReg {
214 constexpr PhysReg() = default;
215 explicit constexpr PhysReg(unsigned r) : reg(r) {}
216 constexpr operator unsigned() const { return reg; }
217
218 uint16_t reg = 0;
219 };
220
221 /* helper expressions for special registers */
222 static constexpr PhysReg m0{124};
223 static constexpr PhysReg vcc{106};
224 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
225 static constexpr PhysReg exec{126};
226 static constexpr PhysReg exec_lo{126};
227 static constexpr PhysReg exec_hi{127};
228 static constexpr PhysReg scc{253};
229
230 /**
231 * Operand Class
232 * Initially, each Operand refers to either
233 * a temporary virtual register
234 * or to a constant value
235 * Temporary registers get mapped to physical register during RA
236 * Constant values are inlined into the instruction sequence.
237 */
238 class Operand final
239 {
240 public:
241 constexpr Operand()
242 : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
243 isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
244
245 explicit Operand(Temp r) noexcept
246 {
247 data_.temp = r;
248 if (r.id()) {
249 isTemp_ = true;
250 } else {
251 isUndef_ = true;
252 setFixed(PhysReg{128});
253 }
254 };
255 explicit Operand(uint32_t v) noexcept
256 {
257 data_.i = v;
258 isConstant_ = true;
259 if (v <= 64)
260 setFixed(PhysReg{128 + v});
261 else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
262 setFixed(PhysReg{192 - v});
263 else if (v == 0x3f000000) /* 0.5 */
264 setFixed(PhysReg{240});
265 else if (v == 0xbf000000) /* -0.5 */
266 setFixed(PhysReg{241});
267 else if (v == 0x3f800000) /* 1.0 */
268 setFixed(PhysReg{242});
269 else if (v == 0xbf800000) /* -1.0 */
270 setFixed(PhysReg{243});
271 else if (v == 0x40000000) /* 2.0 */
272 setFixed(PhysReg{244});
273 else if (v == 0xc0000000) /* -2.0 */
274 setFixed(PhysReg{245});
275 else if (v == 0x40800000) /* 4.0 */
276 setFixed(PhysReg{246});
277 else if (v == 0xc0800000) /* -4.0 */
278 setFixed(PhysReg{247});
279 else if (v == 0x3e22f983) /* 1/(2*PI) */
280 setFixed(PhysReg{248});
281 else /* Literal Constant */
282 setFixed(PhysReg{255});
283 };
284 explicit Operand(uint64_t v) noexcept
285 {
286 isConstant_ = true;
287 is64BitConst_ = true;
288 if (v <= 64)
289 setFixed(PhysReg{128 + (uint32_t) v});
290 else if (v >= 0xFFFFFFFFFFFFFFF0) /* [-16 .. -1] */
291 setFixed(PhysReg{192 - (uint32_t) v});
292 else if (v == 0x3FE0000000000000) /* 0.5 */
293 setFixed(PhysReg{240});
294 else if (v == 0xBFE0000000000000) /* -0.5 */
295 setFixed(PhysReg{241});
296 else if (v == 0x3FF0000000000000) /* 1.0 */
297 setFixed(PhysReg{242});
298 else if (v == 0xBFF0000000000000) /* -1.0 */
299 setFixed(PhysReg{243});
300 else if (v == 0x4000000000000000) /* 2.0 */
301 setFixed(PhysReg{244});
302 else if (v == 0xC000000000000000) /* -2.0 */
303 setFixed(PhysReg{245});
304 else if (v == 0x4010000000000000) /* 4.0 */
305 setFixed(PhysReg{246});
306 else if (v == 0xC010000000000000) /* -4.0 */
307 setFixed(PhysReg{247});
308 else if (v == 0x3fc45f306dc9c882) /* 1/(2*PI) */
309 setFixed(PhysReg{248});
310 else { /* Literal Constant: we don't know if it is a long or double.*/
311 isConstant_ = 0;
312 assert(false && "attempt to create a 64-bit literal constant");
313 }
314 };
315 explicit Operand(RegClass type) noexcept
316 {
317 isUndef_ = true;
318 data_.temp = Temp(0, type);
319 setFixed(PhysReg{128});
320 };
321 explicit Operand(PhysReg reg, RegClass type) noexcept
322 {
323 data_.temp = Temp(0, type);
324 setFixed(reg);
325 }
326
327 constexpr bool isTemp() const noexcept
328 {
329 return isTemp_;
330 }
331
332 constexpr void setTemp(Temp t) noexcept {
333 assert(!isConstant_);
334 isTemp_ = true;
335 data_.temp = t;
336 }
337
338 constexpr Temp getTemp() const noexcept
339 {
340 return data_.temp;
341 }
342
343 constexpr uint32_t tempId() const noexcept
344 {
345 return data_.temp.id();
346 }
347
348 constexpr bool hasRegClass() const noexcept
349 {
350 return isTemp() || isUndefined();
351 }
352
353 constexpr RegClass regClass() const noexcept
354 {
355 return data_.temp.regClass();
356 }
357
358 constexpr unsigned size() const noexcept
359 {
360 if (isConstant())
361 return is64BitConst_ ? 2 : 1;
362 else
363 return data_.temp.size();
364 }
365
366 constexpr bool isFixed() const noexcept
367 {
368 return isFixed_;
369 }
370
371 constexpr PhysReg physReg() const noexcept
372 {
373 return reg_;
374 }
375
376 constexpr void setFixed(PhysReg reg) noexcept
377 {
378 isFixed_ = reg != unsigned(-1);
379 reg_ = reg;
380 }
381
382 constexpr bool isConstant() const noexcept
383 {
384 return isConstant_;
385 }
386
387 constexpr bool isLiteral() const noexcept
388 {
389 return isConstant() && reg_ == 255;
390 }
391
392 constexpr bool isUndefined() const noexcept
393 {
394 return isUndef_;
395 }
396
397 constexpr uint32_t constantValue() const noexcept
398 {
399 return data_.i;
400 }
401
402 constexpr bool constantEquals(uint32_t cmp) const noexcept
403 {
404 return isConstant() && constantValue() == cmp;
405 }
406
407 constexpr void setKill(bool flag) noexcept
408 {
409 isKill_ = flag;
410 if (!flag)
411 setFirstKill(false);
412 }
413
414 constexpr bool isKill() const noexcept
415 {
416 return isKill_ || isFirstKill();
417 }
418
419 constexpr void setFirstKill(bool flag) noexcept
420 {
421 isFirstKill_ = flag;
422 if (flag)
423 setKill(flag);
424 }
425
426 /* When there are multiple operands killing the same temporary,
427 * isFirstKill() is only returns true for the first one. */
428 constexpr bool isFirstKill() const noexcept
429 {
430 return isFirstKill_;
431 }
432
433 private:
434 union {
435 uint32_t i;
436 float f;
437 Temp temp = Temp(0, s1);
438 } data_;
439 PhysReg reg_;
440 union {
441 struct {
442 uint8_t isTemp_:1;
443 uint8_t isFixed_:1;
444 uint8_t isConstant_:1;
445 uint8_t isKill_:1;
446 uint8_t isUndef_:1;
447 uint8_t isFirstKill_:1;
448 uint8_t is64BitConst_:1;
449 };
450 /* can't initialize bit-fields in c++11, so work around using a union */
451 uint8_t control_ = 0;
452 };
453 };
454
455 /**
456 * Definition Class
457 * Definitions are the results of Instructions
458 * and refer to temporary virtual registers
459 * which are later mapped to physical registers
460 */
461 class Definition final
462 {
463 public:
464 constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
465 Definition(uint32_t index, RegClass type) noexcept
466 : temp(index, type) {}
467 explicit Definition(Temp tmp) noexcept
468 : temp(tmp) {}
469 Definition(PhysReg reg, RegClass type) noexcept
470 : temp(Temp(0, type))
471 {
472 setFixed(reg);
473 }
474 Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
475 : temp(Temp(tmpId, type))
476 {
477 setFixed(reg);
478 }
479
480 constexpr bool isTemp() const noexcept
481 {
482 return tempId() > 0;
483 }
484
485 constexpr Temp getTemp() const noexcept
486 {
487 return temp;
488 }
489
490 constexpr uint32_t tempId() const noexcept
491 {
492 return temp.id();
493 }
494
495 constexpr void setTemp(Temp t) noexcept {
496 temp = t;
497 }
498
499 constexpr RegClass regClass() const noexcept
500 {
501 return temp.regClass();
502 }
503
504 constexpr unsigned size() const noexcept
505 {
506 return temp.size();
507 }
508
509 constexpr bool isFixed() const noexcept
510 {
511 return isFixed_;
512 }
513
514 constexpr PhysReg physReg() const noexcept
515 {
516 return reg_;
517 }
518
519 constexpr void setFixed(PhysReg reg) noexcept
520 {
521 isFixed_ = 1;
522 reg_ = reg;
523 }
524
525 constexpr void setHint(PhysReg reg) noexcept
526 {
527 hasHint_ = 1;
528 reg_ = reg;
529 }
530
531 constexpr bool hasHint() const noexcept
532 {
533 return hasHint_;
534 }
535
536 constexpr void setKill(bool flag) noexcept
537 {
538 isKill_ = flag;
539 }
540
541 constexpr bool isKill() const noexcept
542 {
543 return isKill_;
544 }
545
546 private:
547 Temp temp = Temp(0, s1);
548 PhysReg reg_;
549 union {
550 struct {
551 uint8_t isFixed_:1;
552 uint8_t hasHint_:1;
553 uint8_t isKill_:1;
554 };
555 /* can't initialize bit-fields in c++11, so work around using a union */
556 uint8_t control_ = 0;
557 };
558 };
559
560 class Block;
561
562 struct Instruction {
563 aco_opcode opcode;
564 Format format;
565 uint32_t pass_flags;
566
567 aco::span<Operand> operands;
568 aco::span<Definition> definitions;
569
570 constexpr bool isVALU() const noexcept
571 {
572 return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
573 || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
574 || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
575 || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
576 || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
577 || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
578 }
579
580 constexpr bool isSALU() const noexcept
581 {
582 return format == Format::SOP1 ||
583 format == Format::SOP2 ||
584 format == Format::SOPC ||
585 format == Format::SOPK ||
586 format == Format::SOPP;
587 }
588
589 constexpr bool isVMEM() const noexcept
590 {
591 return format == Format::MTBUF ||
592 format == Format::MUBUF ||
593 format == Format::MIMG;
594 }
595
596 constexpr bool isDPP() const noexcept
597 {
598 return (uint16_t) format & (uint16_t) Format::DPP;
599 }
600
601 constexpr bool isVOP3() const noexcept
602 {
603 return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
604 ((uint16_t) format & (uint16_t) Format::VOP3B) ||
605 format == Format::VOP3P;
606 }
607
608 constexpr bool isSDWA() const noexcept
609 {
610 return (uint16_t) format & (uint16_t) Format::SDWA;
611 }
612
613 constexpr bool isFlatOrGlobal() const noexcept
614 {
615 return format == Format::FLAT || format == Format::GLOBAL;
616 }
617 };
618
619 struct SOPK_instruction : public Instruction {
620 uint16_t imm;
621 };
622
623 struct SOPP_instruction : public Instruction {
624 uint32_t imm;
625 int block;
626 };
627
628 struct SOPC_instruction : public Instruction {
629 };
630
631 struct SOP1_instruction : public Instruction {
632 };
633
634 struct SOP2_instruction : public Instruction {
635 };
636
637 /**
638 * Scalar Memory Format:
639 * For s_(buffer_)load_dword*:
640 * Operand(0): SBASE - SGPR-pair which provides base address
641 * Operand(1): Offset - immediate (un)signed offset or SGPR
642 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
643 * Operand(n-1): SOffset - SGPR offset (Vega only)
644 *
645 * Having no operands is also valid for instructions such as s_dcache_inv.
646 *
647 */
648 struct SMEM_instruction : public Instruction {
649 bool glc; /* VI+: globally coherent */
650 bool dlc; /* NAVI: device level coherent */
651 bool nv; /* VEGA only: Non-volatile */
652 bool can_reorder;
653 bool disable_wqm;
654 barrier_interaction barrier;
655 };
656
657 struct VOP1_instruction : public Instruction {
658 };
659
660 struct VOP2_instruction : public Instruction {
661 };
662
663 struct VOPC_instruction : public Instruction {
664 };
665
666 struct VOP3A_instruction : public Instruction {
667 bool abs[3];
668 bool opsel[3];
669 bool clamp;
670 unsigned omod;
671 bool neg[3];
672 };
673
674 /**
675 * Data Parallel Primitives Format:
676 * This format can be used for VOP1, VOP2 or VOPC instructions.
677 * The swizzle applies to the src0 operand.
678 *
679 */
680 struct DPP_instruction : public Instruction {
681 uint16_t dpp_ctrl;
682 uint8_t row_mask;
683 uint8_t bank_mask;
684 bool abs[2];
685 bool neg[2];
686 bool bound_ctrl;
687 };
688
689 struct Interp_instruction : public Instruction {
690 unsigned attribute;
691 unsigned component;
692 };
693
694 /**
695 * Local and Global Data Sharing instructions
696 * Operand(0): ADDR - VGPR which supplies the address.
697 * Operand(1): DATA0 - First data VGPR.
698 * Operand(2): DATA1 - Second data VGPR.
699 * Operand(n-1): M0 - LDS size.
700 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
701 *
702 */
703 struct DS_instruction : public Instruction {
704 int16_t offset0;
705 int8_t offset1;
706 bool gds;
707 };
708
709 /**
710 * Vector Memory Untyped-buffer Instructions
711 * Operand(0): VADDR - Address source. Can carry an index and/or offset
712 * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
713 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
714 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
715 *
716 */
717 struct MUBUF_instruction : public Instruction {
718 unsigned offset; /* Unsigned byte offset - 12 bit */
719 bool offen; /* Supply an offset from VGPR (VADDR) */
720 bool idxen; /* Supply an index from VGPR (VADDR) */
721 bool glc; /* globally coherent */
722 bool dlc; /* NAVI: device level coherent */
723 bool slc; /* system level coherent */
724 bool tfe; /* texture fail enable */
725 bool lds; /* Return read-data to LDS instead of VGPRs */
726 bool disable_wqm; /* Require an exec mask without helper invocations */
727 bool can_reorder;
728 barrier_interaction barrier;
729 };
730
731 /**
732 * Vector Memory Typed-buffer Instructions
733 * Operand(0): VADDR - Address source. Can carry an index and/or offset
734 * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
735 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
736 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
737 *
738 */
739 struct MTBUF_instruction : public Instruction {
740 union {
741 struct {
742 uint8_t dfmt : 4; /* Data Format of data in memory buffer */
743 uint8_t nfmt : 3; /* Numeric format of data in memory */
744 };
745 uint8_t img_format; /* Buffer or image format as used by GFX10 */
746 };
747 unsigned offset; /* Unsigned byte offset - 12 bit */
748 bool offen; /* Supply an offset from VGPR (VADDR) */
749 bool idxen; /* Supply an index from VGPR (VADDR) */
750 bool glc; /* globally coherent */
751 bool dlc; /* NAVI: device level coherent */
752 bool slc; /* system level coherent */
753 bool tfe; /* texture fail enable */
754 bool disable_wqm; /* Require an exec mask without helper invocations */
755 bool can_reorder;
756 barrier_interaction barrier;
757 };
758
759 /**
760 * Vector Memory Image Instructions
761 * Operand(0): VADDR - Address source. Can carry an offset or an index.
762 * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
763 * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
764 * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
765 *
766 */
767 struct MIMG_instruction : public Instruction {
768 unsigned dmask; /* Data VGPR enable mask */
769 unsigned dim; /* NAVI: dimensionality */
770 bool unrm; /* Force address to be un-normalized */
771 bool dlc; /* NAVI: device level coherent */
772 bool glc; /* globally coherent */
773 bool slc; /* system level coherent */
774 bool tfe; /* texture fail enable */
775 bool da; /* declare an array */
776 bool lwe; /* Force data to be un-normalized */
777 bool r128; /* NAVI: Texture resource size */
778 bool a16; /* VEGA, NAVI: Address components are 16-bits */
779 bool d16; /* Convert 32-bit data to 16-bit data */
780 bool disable_wqm; /* Require an exec mask without helper invocations */
781 bool can_reorder;
782 barrier_interaction barrier;
783 };
784
785 /**
786 * Flat/Scratch/Global Instructions
787 * Operand(0): ADDR
788 * Operand(1): SADDR
789 * Operand(2) / Definition(0): DATA/VDST
790 *
791 */
792 struct FLAT_instruction : public Instruction {
793 uint16_t offset; /* Vega only */
794 bool slc; /* system level coherent */
795 bool glc; /* globally coherent */
796 bool dlc; /* NAVI: device level coherent */
797 bool lds;
798 bool nv;
799 };
800
801 struct Export_instruction : public Instruction {
802 unsigned enabled_mask;
803 unsigned dest;
804 bool compressed;
805 bool done;
806 bool valid_mask;
807 };
808
809 struct Pseudo_instruction : public Instruction {
810 bool tmp_in_scc;
811 PhysReg scratch_sgpr; /* might not be valid if it's not needed */
812 };
813
814 struct Pseudo_branch_instruction : public Instruction {
815 /* target[0] is the block index of the branch target.
816 * For conditional branches, target[1] contains the fall-through alternative.
817 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
818 */
819 uint32_t target[2];
820 };
821
822 struct Pseudo_barrier_instruction : public Instruction {
823 };
824
825 enum ReduceOp {
826 iadd32, iadd64,
827 imul32, imul64,
828 fadd32, fadd64,
829 fmul32, fmul64,
830 imin32, imin64,
831 imax32, imax64,
832 umin32, umin64,
833 umax32, umax64,
834 fmin32, fmin64,
835 fmax32, fmax64,
836 iand32, iand64,
837 ior32, ior64,
838 ixor32, ixor64,
839 };
840
841 /**
842 * Subgroup Reduction Instructions, everything except for the data to be
843 * reduced and the result as inserted by setup_reduce_temp().
844 * Operand(0): data to be reduced
845 * Operand(1): reduce temporary
846 * Operand(2): vector temporary
847 * Definition(0): result
848 * Definition(1): scalar temporary
849 * Definition(2): scalar identity temporary
850 * Definition(3): scc clobber
851 * Definition(4): vcc clobber
852 *
853 */
854 struct Pseudo_reduction_instruction : public Instruction {
855 ReduceOp reduce_op;
856 unsigned cluster_size; // must be 0 for scans
857 };
858
859 struct instr_deleter_functor {
860 void operator()(void* p) {
861 free(p);
862 }
863 };
864
865 template<typename T>
866 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
867
868 template<typename T>
869 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
870 {
871 std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
872 char *data = (char*) calloc(1, size);
873 T* inst = (T*) data;
874
875 inst->opcode = opcode;
876 inst->format = format;
877
878 inst->operands = aco::span<Operand>((Operand*)(data + sizeof(T)), num_operands);
879 inst->definitions = aco::span<Definition>((Definition*)inst->operands.end(), num_definitions);
880
881 return inst;
882 }
883
884 constexpr bool is_phi(Instruction* instr)
885 {
886 return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
887 }
888
889 static inline bool is_phi(aco_ptr<Instruction>& instr)
890 {
891 return is_phi(instr.get());
892 }
893
894 constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
895 {
896 switch (instr->format) {
897 case Format::SMEM:
898 return static_cast<SMEM_instruction*>(instr)->barrier;
899 case Format::MUBUF:
900 return static_cast<MUBUF_instruction*>(instr)->barrier;
901 case Format::MIMG:
902 return static_cast<MIMG_instruction*>(instr)->barrier;
903 case Format::FLAT:
904 case Format::GLOBAL:
905 return barrier_buffer;
906 case Format::DS:
907 return barrier_shared;
908 default:
909 return barrier_none;
910 }
911 }
912
913 enum block_kind {
914 /* uniform indicates that leaving this block,
915 * all actives lanes stay active */
916 block_kind_uniform = 1 << 0,
917 block_kind_top_level = 1 << 1,
918 block_kind_loop_preheader = 1 << 2,
919 block_kind_loop_header = 1 << 3,
920 block_kind_loop_exit = 1 << 4,
921 block_kind_continue = 1 << 5,
922 block_kind_break = 1 << 6,
923 block_kind_continue_or_break = 1 << 7,
924 block_kind_discard = 1 << 8,
925 block_kind_branch = 1 << 9,
926 block_kind_merge = 1 << 10,
927 block_kind_invert = 1 << 11,
928 block_kind_uses_discard_if = 1 << 12,
929 block_kind_needs_lowering = 1 << 13,
930 block_kind_uses_demote = 1 << 14,
931 };
932
933
934 struct RegisterDemand {
935 constexpr RegisterDemand() = default;
936 constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
937 : vgpr{v}, sgpr{s} {}
938 int16_t vgpr = 0;
939 int16_t sgpr = 0;
940
941 constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
942 return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
943 }
944
945 constexpr bool exceeds(const RegisterDemand other) const noexcept {
946 return vgpr > other.vgpr || sgpr > other.sgpr;
947 }
948
949 constexpr RegisterDemand operator+(const Temp t) const noexcept {
950 if (t.type() == RegType::sgpr)
951 return RegisterDemand( vgpr, sgpr + t.size() );
952 else
953 return RegisterDemand( vgpr + t.size(), sgpr );
954 }
955
956 constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
957 return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
958 }
959
960 constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
961 return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
962 }
963
964 constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
965 vgpr += other.vgpr;
966 sgpr += other.sgpr;
967 return *this;
968 }
969
970 constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
971 vgpr -= other.vgpr;
972 sgpr -= other.sgpr;
973 return *this;
974 }
975
976 constexpr RegisterDemand& operator+=(const Temp t) noexcept {
977 if (t.type() == RegType::sgpr)
978 sgpr += t.size();
979 else
980 vgpr += t.size();
981 return *this;
982 }
983
984 constexpr RegisterDemand& operator-=(const Temp t) noexcept {
985 if (t.type() == RegType::sgpr)
986 sgpr -= t.size();
987 else
988 vgpr -= t.size();
989 return *this;
990 }
991
992 constexpr void update(const RegisterDemand other) noexcept {
993 vgpr = std::max(vgpr, other.vgpr);
994 sgpr = std::max(sgpr, other.sgpr);
995 }
996
997 };
998
999 /* CFG */
1000 struct Block {
1001 unsigned index;
1002 unsigned offset = 0;
1003 std::vector<aco_ptr<Instruction>> instructions;
1004 std::vector<unsigned> logical_preds;
1005 std::vector<unsigned> linear_preds;
1006 std::vector<unsigned> logical_succs;
1007 std::vector<unsigned> linear_succs;
1008 RegisterDemand register_demand = RegisterDemand();
1009 uint16_t loop_nest_depth = 0;
1010 uint16_t kind = 0;
1011 int logical_idom = -1;
1012 int linear_idom = -1;
1013 Temp live_out_exec = Temp();
1014
1015 /* this information is needed for predecessors to blocks with phis when
1016 * moving out of ssa */
1017 bool scc_live_out = false;
1018 PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1019
1020 Block(unsigned idx) : index(idx) {}
1021 Block() : index(0) {}
1022 };
1023
1024 using Stage = uint16_t;
1025
1026 /* software stages */
1027 static constexpr Stage sw_vs = 1 << 0;
1028 static constexpr Stage sw_gs = 1 << 1;
1029 static constexpr Stage sw_tcs = 1 << 2;
1030 static constexpr Stage sw_tes = 1 << 3;
1031 static constexpr Stage sw_fs = 1 << 4;
1032 static constexpr Stage sw_cs = 1 << 5;
1033 static constexpr Stage sw_mask = 0x3f;
1034
1035 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1036 static constexpr Stage hw_vs = 1 << 6;
1037 static constexpr Stage hw_es = 1 << 7;
1038 static constexpr Stage hw_gs = 1 << 8; /* not on GFX9. combined into ES on GFX9 (and GFX10/legacy). */
1039 static constexpr Stage hw_ls = 1 << 9;
1040 static constexpr Stage hw_hs = 1 << 10; /* not on GFX9. combined into LS on GFX9 (and GFX10/legacy). */
1041 static constexpr Stage hw_fs = 1 << 11;
1042 static constexpr Stage hw_cs = 1 << 12;
1043 static constexpr Stage hw_mask = 0x7f << 6;
1044
1045 /* possible settings of Program::stage */
1046 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1047 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1048 static constexpr Stage compute_cs = sw_cs | hw_cs;
1049 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1050 /* GFX10/NGG */
1051 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1052 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1053 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1054 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1055 /* GFX9 (and GFX10 if NGG isn't used) */
1056 static constexpr Stage vertex_geometry_es = sw_vs | sw_gs | hw_es;
1057 static constexpr Stage vertex_tess_control_ls = sw_vs | sw_tcs | hw_ls;
1058 static constexpr Stage tess_eval_geometry_es = sw_tes | sw_gs | hw_es;
1059 /* pre-GFX9 */
1060 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1061 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1062 static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before GS */
1063 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1064
1065 class Program final {
1066 public:
1067 std::vector<Block> blocks;
1068 RegisterDemand max_reg_demand = RegisterDemand();
1069 uint16_t sgpr_limit = 0;
1070 uint16_t num_waves = 0;
1071 ac_shader_config* config;
1072 struct radv_shader_info *info;
1073 enum chip_class chip_class;
1074 enum radeon_family family;
1075 unsigned wave_size;
1076 Stage stage; /* Stage */
1077 bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1078 bool needs_wqm = false; /* there exists a p_wqm instruction */
1079 bool wb_smem_l1_on_end = false;
1080
1081 std::vector<uint8_t> constant_data;
1082
1083 uint32_t allocateId()
1084 {
1085 assert(allocationID <= 16777215);
1086 return allocationID++;
1087 }
1088
1089 uint32_t peekAllocationId()
1090 {
1091 return allocationID;
1092 }
1093
1094 void setAllocationId(uint32_t id)
1095 {
1096 allocationID = id;
1097 }
1098
1099 Block* create_and_insert_block() {
1100 blocks.emplace_back(blocks.size());
1101 return &blocks.back();
1102 }
1103
1104 Block* insert_block(Block&& block) {
1105 block.index = blocks.size();
1106 blocks.emplace_back(std::move(block));
1107 return &blocks.back();
1108 }
1109
1110 private:
1111 uint32_t allocationID = 1;
1112 };
1113
1114 struct live {
1115 /* live temps out per block */
1116 std::vector<std::set<Temp>> live_out;
1117 /* register demand (sgpr/vgpr) per instruction per block */
1118 std::vector<std::vector<RegisterDemand>> register_demand;
1119 };
1120
1121 void select_program(Program *program,
1122 unsigned shader_count,
1123 struct nir_shader *const *shaders,
1124 ac_shader_config* config,
1125 struct radv_shader_info *info,
1126 struct radv_nir_compiler_options *options);
1127
1128 void lower_wqm(Program* program, live& live_vars,
1129 const struct radv_nir_compiler_options *options);
1130 void lower_bool_phis(Program* program);
1131 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1132 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1133 std::vector<uint16_t> dead_code_analysis(Program *program);
1134 void dominator_tree(Program* program);
1135 void insert_exec_mask(Program *program);
1136 void value_numbering(Program* program);
1137 void optimize(Program* program);
1138 void setup_reduce_temp(Program* program);
1139 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1140 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1141 void ssa_elimination(Program* program);
1142 void lower_to_hw_instr(Program* program);
1143 void schedule_program(Program* program, live& live_vars);
1144 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1145 void insert_wait_states(Program* program);
1146 void insert_NOPs(Program* program);
1147 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1148 void print_asm(Program *program, std::vector<uint32_t>& binary,
1149 unsigned exec_size, std::ostream& out);
1150 void validate(Program* program, FILE *output);
1151 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1152 #ifndef NDEBUG
1153 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1154 #else
1155 #define perfwarn(program, cond, msg, ...)
1156 #endif
1157
1158 void aco_print_instr(Instruction *instr, FILE *output);
1159 void aco_print_program(Program *program, FILE *output);
1160
1161 typedef struct {
1162 const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1163 const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1164 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1165 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1166 const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1167 const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1168 } Info;
1169
1170 extern const Info instr_info;
1171
1172 }
1173
1174 #endif /* ACO_IR_H */
1175