2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include <unordered_set>
35 #include "ac_binary.h"
36 #include "amd_family.h"
37 #include "aco_opcodes.h"
40 struct radv_nir_compiler_options
;
41 struct radv_shader_args
;
42 struct radv_shader_info
;
46 extern uint64_t debug_flags
;
50 DEBUG_VALIDATE_RA
= 0x2,
55 * Representation of the instruction's microcode encoding format
56 * Note: Some Vector ALU Formats can be combined, such that:
57 * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
58 * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
59 * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
61 * (*) The same is applicable for VOP1 and VOPC instructions.
63 enum class Format
: std::uint16_t {
64 /* Pseudo Instruction Format */
66 /* Scalar ALU & Control Formats */
72 /* Scalar Memory Format */
76 /* Vector Memory Buffer Formats */
79 /* Vector Memory Image Format */
90 PSEUDO_REDUCTION
= 18,
92 /* Vector ALU Formats */
100 /* Vector Parameter Interpolation Format */
106 enum barrier_interaction
: uint8_t {
108 barrier_buffer
= 0x1,
110 barrier_atomic
= 0x4,
111 barrier_shared
= 0x8,
112 /* used for geometry shaders to ensure vertex data writes are before the
113 * GS_DONE s_sendmsg. */
114 barrier_gs_data
= 0x10,
115 /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
116 barrier_gs_sendmsg
= 0x20,
117 /* used by barriers. created by s_barrier */
118 barrier_barrier
= 0x40,
130 /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
131 * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
132 fp_denorm_flush
= 0x0,
133 fp_denorm_keep
= 0x3,
137 /* matches encoding of the MODE register */
141 fp_round round16_64
:2;
143 unsigned denorm16_64
:2;
147 /* if false, optimizations which may remove infs/nan/-0.0 can be done */
148 bool preserve_signed_zero_inf_nan32
:1;
149 bool preserve_signed_zero_inf_nan16_64
:1;
150 /* if false, optimizations which may remove denormal flushing can be done */
151 bool must_flush_denorms32
:1;
152 bool must_flush_denorms16_64
:1;
153 bool care_about_round32
:1;
154 bool care_about_round16_64
:1;
156 /* Returns true if instructions using the mode "other" can safely use the
157 * current one instead. */
158 bool canReplace(float_mode other
) const noexcept
{
159 return val
== other
.val
&&
160 (preserve_signed_zero_inf_nan32
|| !other
.preserve_signed_zero_inf_nan32
) &&
161 (preserve_signed_zero_inf_nan16_64
|| !other
.preserve_signed_zero_inf_nan16_64
) &&
162 (must_flush_denorms32
|| !other
.must_flush_denorms32
) &&
163 (must_flush_denorms16_64
|| !other
.must_flush_denorms16_64
) &&
164 (care_about_round32
|| !other
.care_about_round32
) &&
165 (care_about_round16_64
|| !other
.care_about_round16_64
);
169 constexpr Format
asVOP3(Format format
) {
170 return (Format
) ((uint32_t) Format::VOP3
| (uint32_t) format
);
173 constexpr Format
asSDWA(Format format
) {
174 assert(format
== Format::VOP1
|| format
== Format::VOP2
|| format
== Format::VOPC
);
175 return (Format
) ((uint32_t) Format::SDWA
| (uint32_t) format
);
203 /* byte-sized register class */
210 /* these are used for WWM and spills to vgpr */
211 v1_linear
= v1
| (1 << 6),
212 v2_linear
= v2
| (1 << 6),
215 RegClass() = default;
216 constexpr RegClass(RC rc
)
218 constexpr RegClass(RegType type
, unsigned size
)
219 : rc((RC
) ((type
== RegType::vgpr
? 1 << 5 : 0) | size
)) {}
221 constexpr operator RC() const { return rc
; }
222 explicit operator bool() = delete;
224 constexpr RegType
type() const { return rc
<= RC::s16
? RegType::sgpr
: RegType::vgpr
; }
225 constexpr bool is_subdword() const { return rc
& (1 << 7); }
226 constexpr unsigned bytes() const { return ((unsigned) rc
& 0x1F) * (is_subdword() ? 1 : 4); }
227 //TODO: use size() less in favor of bytes()
228 constexpr unsigned size() const { return (bytes() + 3) >> 2; }
229 constexpr bool is_linear() const { return rc
<= RC::s16
|| rc
& (1 << 6); }
230 constexpr RegClass
as_linear() const { return RegClass((RC
) (rc
| (1 << 6))); }
231 constexpr RegClass
as_subdword() const { return RegClass((RC
) (rc
| 1 << 7)); }
237 /* transitional helper expressions */
238 static constexpr RegClass s1
{RegClass::s1
};
239 static constexpr RegClass s2
{RegClass::s2
};
240 static constexpr RegClass s3
{RegClass::s3
};
241 static constexpr RegClass s4
{RegClass::s4
};
242 static constexpr RegClass s8
{RegClass::s8
};
243 static constexpr RegClass s16
{RegClass::s16
};
244 static constexpr RegClass v1
{RegClass::v1
};
245 static constexpr RegClass v2
{RegClass::v2
};
246 static constexpr RegClass v3
{RegClass::v3
};
247 static constexpr RegClass v4
{RegClass::v4
};
248 static constexpr RegClass v5
{RegClass::v5
};
249 static constexpr RegClass v6
{RegClass::v6
};
250 static constexpr RegClass v7
{RegClass::v7
};
251 static constexpr RegClass v8
{RegClass::v8
};
252 static constexpr RegClass v1b
{RegClass::v1b
};
253 static constexpr RegClass v2b
{RegClass::v2b
};
254 static constexpr RegClass v3b
{RegClass::v3b
};
255 static constexpr RegClass v4b
{RegClass::v4b
};
256 static constexpr RegClass v6b
{RegClass::v6b
};
257 static constexpr RegClass v8b
{RegClass::v8b
};
261 * Each temporary virtual register has a
262 * register class (i.e. size and type)
266 Temp() noexcept
: id_(0), reg_class(0) {}
267 constexpr Temp(uint32_t id
, RegClass cls
) noexcept
268 : id_(id
), reg_class(uint8_t(cls
)) {}
270 constexpr uint32_t id() const noexcept
{ return id_
; }
271 constexpr RegClass
regClass() const noexcept
{ return (RegClass::RC
)reg_class
; }
273 constexpr unsigned bytes() const noexcept
{ return regClass().bytes(); }
274 constexpr unsigned size() const noexcept
{ return regClass().size(); }
275 constexpr RegType
type() const noexcept
{ return regClass().type(); }
276 constexpr bool is_linear() const noexcept
{ return regClass().is_linear(); }
278 constexpr bool operator <(Temp other
) const noexcept
{ return id() < other
.id(); }
279 constexpr bool operator==(Temp other
) const noexcept
{ return id() == other
.id(); }
280 constexpr bool operator!=(Temp other
) const noexcept
{ return id() != other
.id(); }
284 uint32_t reg_class
: 8;
289 * Represents the physical register for each
290 * Operand and Definition.
293 constexpr PhysReg() = default;
294 explicit constexpr PhysReg(unsigned r
) : reg_b(r
<< 2) {}
295 constexpr unsigned reg() const { return reg_b
>> 2; }
296 constexpr unsigned byte() const { return reg_b
& 0x3; }
297 constexpr operator unsigned() const { return reg(); }
298 constexpr bool operator==(PhysReg other
) const { return reg_b
== other
.reg_b
; }
299 constexpr bool operator!=(PhysReg other
) const { return reg_b
!= other
.reg_b
; }
300 constexpr bool operator <(PhysReg other
) const { return reg_b
< other
.reg_b
; }
305 /* helper expressions for special registers */
306 static constexpr PhysReg m0
{124};
307 static constexpr PhysReg vcc
{106};
308 static constexpr PhysReg vcc_hi
{107};
309 static constexpr PhysReg sgpr_null
{125}; /* GFX10+ */
310 static constexpr PhysReg exec
{126};
311 static constexpr PhysReg exec_lo
{126};
312 static constexpr PhysReg exec_hi
{127};
313 static constexpr PhysReg vccz
{251};
314 static constexpr PhysReg execz
{252};
315 static constexpr PhysReg scc
{253};
319 * Initially, each Operand refers to either
320 * a temporary virtual register
321 * or to a constant value
322 * Temporary registers get mapped to physical register during RA
323 * Constant values are inlined into the instruction sequence.
329 : reg_(PhysReg
{128}), isTemp_(false), isFixed_(true), isConstant_(false),
330 isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false),
331 isLateKill_(false) {}
333 explicit Operand(Temp r
) noexcept
340 setFixed(PhysReg
{128});
343 explicit Operand(uint32_t v
, bool is64bit
= false) noexcept
347 is64BitConst_
= is64bit
;
349 setFixed(PhysReg
{128 + v
});
350 else if (v
>= 0xFFFFFFF0) /* [-16 .. -1] */
351 setFixed(PhysReg
{192 - v
});
352 else if (v
== 0x3f000000) /* 0.5 */
353 setFixed(PhysReg
{240});
354 else if (v
== 0xbf000000) /* -0.5 */
355 setFixed(PhysReg
{241});
356 else if (v
== 0x3f800000) /* 1.0 */
357 setFixed(PhysReg
{242});
358 else if (v
== 0xbf800000) /* -1.0 */
359 setFixed(PhysReg
{243});
360 else if (v
== 0x40000000) /* 2.0 */
361 setFixed(PhysReg
{244});
362 else if (v
== 0xc0000000) /* -2.0 */
363 setFixed(PhysReg
{245});
364 else if (v
== 0x40800000) /* 4.0 */
365 setFixed(PhysReg
{246});
366 else if (v
== 0xc0800000) /* -4.0 */
367 setFixed(PhysReg
{247});
368 else { /* Literal Constant */
369 assert(!is64bit
&& "attempt to create a 64-bit literal constant");
370 setFixed(PhysReg
{255});
373 explicit Operand(uint64_t v
) noexcept
376 is64BitConst_
= true;
378 data_
.i
= (uint32_t) v
;
379 setFixed(PhysReg
{128 + (uint32_t) v
});
380 } else if (v
>= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
381 data_
.i
= (uint32_t) v
;
382 setFixed(PhysReg
{192 - (uint32_t) v
});
383 } else if (v
== 0x3FE0000000000000) { /* 0.5 */
384 data_
.i
= 0x3f000000;
385 setFixed(PhysReg
{240});
386 } else if (v
== 0xBFE0000000000000) { /* -0.5 */
387 data_
.i
= 0xbf000000;
388 setFixed(PhysReg
{241});
389 } else if (v
== 0x3FF0000000000000) { /* 1.0 */
390 data_
.i
= 0x3f800000;
391 setFixed(PhysReg
{242});
392 } else if (v
== 0xBFF0000000000000) { /* -1.0 */
393 data_
.i
= 0xbf800000;
394 setFixed(PhysReg
{243});
395 } else if (v
== 0x4000000000000000) { /* 2.0 */
396 data_
.i
= 0x40000000;
397 setFixed(PhysReg
{244});
398 } else if (v
== 0xC000000000000000) { /* -2.0 */
399 data_
.i
= 0xc0000000;
400 setFixed(PhysReg
{245});
401 } else if (v
== 0x4010000000000000) { /* 4.0 */
402 data_
.i
= 0x40800000;
403 setFixed(PhysReg
{246});
404 } else if (v
== 0xC010000000000000) { /* -4.0 */
405 data_
.i
= 0xc0800000;
406 setFixed(PhysReg
{247});
407 } else { /* Literal Constant: we don't know if it is a long or double.*/
409 assert(false && "attempt to create a 64-bit literal constant");
412 explicit Operand(RegClass type
) noexcept
415 data_
.temp
= Temp(0, type
);
416 setFixed(PhysReg
{128});
418 explicit Operand(PhysReg reg
, RegClass type
) noexcept
420 data_
.temp
= Temp(0, type
);
424 constexpr bool isTemp() const noexcept
429 constexpr void setTemp(Temp t
) noexcept
{
430 assert(!isConstant_
);
435 constexpr Temp
getTemp() const noexcept
440 constexpr uint32_t tempId() const noexcept
442 return data_
.temp
.id();
445 constexpr bool hasRegClass() const noexcept
447 return isTemp() || isUndefined();
450 constexpr RegClass
regClass() const noexcept
452 return data_
.temp
.regClass();
455 constexpr unsigned bytes() const noexcept
458 return is64BitConst_
? 8 : 4; //TODO: sub-dword constants
460 return data_
.temp
.bytes();
463 constexpr unsigned size() const noexcept
466 return is64BitConst_
? 2 : 1;
468 return data_
.temp
.size();
471 constexpr bool isFixed() const noexcept
476 constexpr PhysReg
physReg() const noexcept
481 constexpr void setFixed(PhysReg reg
) noexcept
483 isFixed_
= reg
!= unsigned(-1);
487 constexpr bool isConstant() const noexcept
492 constexpr bool isLiteral() const noexcept
494 return isConstant() && reg_
== 255;
497 constexpr bool isUndefined() const noexcept
502 constexpr uint32_t constantValue() const noexcept
507 constexpr bool constantEquals(uint32_t cmp
) const noexcept
509 return isConstant() && constantValue() == cmp
;
512 constexpr uint64_t constantValue64(bool signext
=false) const noexcept
517 else if (reg_
<= 208)
518 return 0xFFFFFFFFFFFFFFFF - (reg_
- 193);
522 return 0x3FE0000000000000;
524 return 0xBFE0000000000000;
526 return 0x3FF0000000000000;
528 return 0xBFF0000000000000;
530 return 0x4000000000000000;
532 return 0xC000000000000000;
534 return 0x4010000000000000;
536 return 0xC010000000000000;
539 return (signext
&& (data_
.i
& 0x80000000u
) ? 0xffffffff00000000ull
: 0ull) | data_
.i
;
542 /* Indicates that the killed operand's live range intersects with the
543 * instruction's definitions. Unlike isKill() and isFirstKill(), this is
544 * not set by liveness analysis. */
545 constexpr void setLateKill(bool flag
) noexcept
550 constexpr bool isLateKill() const noexcept
555 constexpr void setKill(bool flag
) noexcept
562 constexpr bool isKill() const noexcept
564 return isKill_
|| isFirstKill();
567 constexpr void setFirstKill(bool flag
) noexcept
574 /* When there are multiple operands killing the same temporary,
575 * isFirstKill() is only returns true for the first one. */
576 constexpr bool isFirstKill() const noexcept
581 constexpr bool isKillBeforeDef() const noexcept
583 return isKill() && !isLateKill();
586 constexpr bool isFirstKillBeforeDef() const noexcept
588 return isFirstKill() && !isLateKill();
591 constexpr bool operator == (Operand other
) const noexcept
593 if (other
.size() != size())
595 if (isFixed() != other
.isFixed() || isKillBeforeDef() != other
.isKillBeforeDef())
597 if (isFixed() && other
.isFixed() && physReg() != other
.physReg())
600 return other
.isLiteral() && other
.constantValue() == constantValue();
601 else if (isConstant())
602 return other
.isConstant() && other
.physReg() == physReg();
603 else if (isUndefined())
604 return other
.isUndefined() && other
.regClass() == regClass();
606 return other
.isTemp() && other
.getTemp() == getTemp();
612 Temp temp
= Temp(0, s1
);
619 uint8_t isConstant_
:1;
622 uint8_t isFirstKill_
:1;
623 uint8_t is64BitConst_
:1;
624 uint8_t isLateKill_
:1;
626 /* can't initialize bit-fields in c++11, so work around using a union */
627 uint8_t control_
= 0;
633 * Definitions are the results of Instructions
634 * and refer to temporary virtual registers
635 * which are later mapped to physical registers
637 class Definition final
640 constexpr Definition() : temp(Temp(0, s1
)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
641 Definition(uint32_t index
, RegClass type
) noexcept
642 : temp(index
, type
) {}
643 explicit Definition(Temp tmp
) noexcept
645 Definition(PhysReg reg
, RegClass type
) noexcept
646 : temp(Temp(0, type
))
650 Definition(uint32_t tmpId
, PhysReg reg
, RegClass type
) noexcept
651 : temp(Temp(tmpId
, type
))
656 constexpr bool isTemp() const noexcept
661 constexpr Temp
getTemp() const noexcept
666 constexpr uint32_t tempId() const noexcept
671 constexpr void setTemp(Temp t
) noexcept
{
675 constexpr RegClass
regClass() const noexcept
677 return temp
.regClass();
680 constexpr unsigned bytes() const noexcept
685 constexpr unsigned size() const noexcept
690 constexpr bool isFixed() const noexcept
695 constexpr PhysReg
physReg() const noexcept
700 constexpr void setFixed(PhysReg reg
) noexcept
706 constexpr void setHint(PhysReg reg
) noexcept
712 constexpr bool hasHint() const noexcept
717 constexpr void setKill(bool flag
) noexcept
722 constexpr bool isKill() const noexcept
728 Temp temp
= Temp(0, s1
);
736 /* can't initialize bit-fields in c++11, so work around using a union */
737 uint8_t control_
= 0;
748 aco::span
<Operand
> operands
;
749 aco::span
<Definition
> definitions
;
751 constexpr bool isVALU() const noexcept
753 return ((uint16_t) format
& (uint16_t) Format::VOP1
) == (uint16_t) Format::VOP1
754 || ((uint16_t) format
& (uint16_t) Format::VOP2
) == (uint16_t) Format::VOP2
755 || ((uint16_t) format
& (uint16_t) Format::VOPC
) == (uint16_t) Format::VOPC
756 || ((uint16_t) format
& (uint16_t) Format::VOP3A
) == (uint16_t) Format::VOP3A
757 || ((uint16_t) format
& (uint16_t) Format::VOP3B
) == (uint16_t) Format::VOP3B
758 || format
== Format::VOP3P
;
761 constexpr bool isSALU() const noexcept
763 return format
== Format::SOP1
||
764 format
== Format::SOP2
||
765 format
== Format::SOPC
||
766 format
== Format::SOPK
||
767 format
== Format::SOPP
;
770 constexpr bool isVMEM() const noexcept
772 return format
== Format::MTBUF
||
773 format
== Format::MUBUF
||
774 format
== Format::MIMG
;
777 constexpr bool isDPP() const noexcept
779 return (uint16_t) format
& (uint16_t) Format::DPP
;
782 constexpr bool isVOP3() const noexcept
784 return ((uint16_t) format
& (uint16_t) Format::VOP3A
) ||
785 ((uint16_t) format
& (uint16_t) Format::VOP3B
);
788 constexpr bool isSDWA() const noexcept
790 return (uint16_t) format
& (uint16_t) Format::SDWA
;
793 constexpr bool isFlatOrGlobal() const noexcept
795 return format
== Format::FLAT
|| format
== Format::GLOBAL
;
798 constexpr bool usesModifiers() const noexcept
;
800 constexpr bool reads_exec() const noexcept
802 for (const Operand
& op
: operands
) {
803 if (op
.isFixed() && op
.physReg() == exec
)
809 static_assert(sizeof(Instruction
) == 16);
811 struct SOPK_instruction
: public Instruction
{
815 static_assert(sizeof(SOPK_instruction
) == sizeof(Instruction
) + 4);
817 struct SOPP_instruction
: public Instruction
{
821 static_assert(sizeof(SOPP_instruction
) == sizeof(Instruction
) + 8);
823 struct SOPC_instruction
: public Instruction
{
825 static_assert(sizeof(SOPC_instruction
) == sizeof(Instruction
) + 0);
827 struct SOP1_instruction
: public Instruction
{
829 static_assert(sizeof(SOP1_instruction
) == sizeof(Instruction
) + 0);
831 struct SOP2_instruction
: public Instruction
{
833 static_assert(sizeof(SOP2_instruction
) == sizeof(Instruction
) + 0);
836 * Scalar Memory Format:
837 * For s_(buffer_)load_dword*:
838 * Operand(0): SBASE - SGPR-pair which provides base address
839 * Operand(1): Offset - immediate (un)signed offset or SGPR
840 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
841 * Operand(n-1): SOffset - SGPR offset (Vega only)
843 * Having no operands is also valid for instructions such as s_dcache_inv.
846 struct SMEM_instruction
: public Instruction
{
847 barrier_interaction barrier
;
848 bool glc
: 1; /* VI+: globally coherent */
849 bool dlc
: 1; /* NAVI: device level coherent */
850 bool nv
: 1; /* VEGA only: Non-volatile */
851 bool can_reorder
: 1;
852 bool disable_wqm
: 1;
853 uint32_t padding
: 19;
855 static_assert(sizeof(SMEM_instruction
) == sizeof(Instruction
) + 4);
857 struct VOP1_instruction
: public Instruction
{
859 static_assert(sizeof(VOP1_instruction
) == sizeof(Instruction
) + 0);
861 struct VOP2_instruction
: public Instruction
{
863 static_assert(sizeof(VOP2_instruction
) == sizeof(Instruction
) + 0);
865 struct VOPC_instruction
: public Instruction
{
867 static_assert(sizeof(VOPC_instruction
) == sizeof(Instruction
) + 0);
869 struct VOP3A_instruction
: public Instruction
{
875 uint32_t padding
: 9;
877 static_assert(sizeof(VOP3A_instruction
) == sizeof(Instruction
) + 8);
879 struct VOP3P_instruction
: public Instruction
{
882 uint8_t opsel_lo
: 3;
883 uint8_t opsel_hi
: 3;
885 uint32_t padding
: 9;
887 static_assert(sizeof(VOP3P_instruction
) == sizeof(Instruction
) + 8);
890 * Data Parallel Primitives Format:
891 * This format can be used for VOP1, VOP2 or VOPC instructions.
892 * The swizzle applies to the src0 operand.
895 struct DPP_instruction
: public Instruction
{
899 uint8_t row_mask
: 4;
900 uint8_t bank_mask
: 4;
902 uint32_t padding
: 7;
904 static_assert(sizeof(DPP_instruction
) == sizeof(Instruction
) + 8);
906 enum sdwa_sel
: uint8_t {
910 sdwa_asuint
= 0x7 | 0x10,
918 /* specific values */
923 sdwa_uword0
= sdwa_isword
| 0,
924 sdwa_uword1
= sdwa_isword
| 1,
927 sdwa_sbyte0
= sdwa_ubyte0
| sdwa_sext
,
928 sdwa_sbyte1
= sdwa_ubyte1
| sdwa_sext
,
929 sdwa_sbyte2
= sdwa_ubyte2
| sdwa_sext
,
930 sdwa_sbyte3
= sdwa_ubyte3
| sdwa_sext
,
931 sdwa_sword0
= sdwa_uword0
| sdwa_sext
,
932 sdwa_sword1
= sdwa_uword1
| sdwa_sext
,
933 sdwa_sdword
= sdwa_udword
| sdwa_sext
,
935 /* register-allocated */
936 sdwa_ubyte
= 1 | sdwa_isra
,
937 sdwa_uword
= 2 | sdwa_isra
,
938 sdwa_sbyte
= sdwa_ubyte
| sdwa_sext
,
939 sdwa_sword
= sdwa_uword
| sdwa_sext
,
943 * Sub-Dword Addressing Format:
944 * This format can be used for VOP1, VOP2 or VOPC instructions.
946 * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
947 * the definition doesn't have to be VCC on GFX9+.
950 struct SDWA_instruction
: public Instruction
{
951 /* these destination modifiers aren't available with VOPC except for
957 bool dst_preserve
: 1;
959 uint8_t omod
: 2; /* GFX9+ */
960 uint32_t padding
: 4;
962 static_assert(sizeof(SDWA_instruction
) == sizeof(Instruction
) + 8);
964 struct Interp_instruction
: public Instruction
{
969 static_assert(sizeof(Interp_instruction
) == sizeof(Instruction
) + 4);
972 * Local and Global Data Sharing instructions
973 * Operand(0): ADDR - VGPR which supplies the address.
974 * Operand(1): DATA0 - First data VGPR.
975 * Operand(2): DATA1 - Second data VGPR.
976 * Operand(n-1): M0 - LDS size.
977 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
980 struct DS_instruction
: public Instruction
{
985 static_assert(sizeof(DS_instruction
) == sizeof(Instruction
) + 4);
988 * Vector Memory Untyped-buffer Instructions
989 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
990 * Operand(1): VADDR - Address source. Can carry an index and/or offset
991 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
992 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
995 struct MUBUF_instruction
: public Instruction
{
996 uint16_t offset
: 12; /* Unsigned byte offset - 12 bit */
997 bool offen
: 1; /* Supply an offset from VGPR (VADDR) */
998 bool idxen
: 1; /* Supply an index from VGPR (VADDR) */
999 bool addr64
: 1; /* SI, CIK: Address size is 64-bit */
1000 bool glc
: 1; /* globally coherent */
1001 bool dlc
: 1; /* NAVI: device level coherent */
1002 bool slc
: 1; /* system level coherent */
1003 bool tfe
: 1; /* texture fail enable */
1004 bool lds
: 1; /* Return read-data to LDS instead of VGPRs */
1005 bool disable_wqm
: 1; /* Require an exec mask without helper invocations */
1006 bool can_reorder
: 1;
1007 uint8_t padding
: 2;
1008 barrier_interaction barrier
;
1010 static_assert(sizeof(MUBUF_instruction
) == sizeof(Instruction
) + 4);
1013 * Vector Memory Typed-buffer Instructions
1014 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1015 * Operand(1): VADDR - Address source. Can carry an index and/or offset
1016 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1017 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1020 struct MTBUF_instruction
: public Instruction
{
1021 uint16_t offset
; /* Unsigned byte offset - 12 bit */
1022 barrier_interaction barrier
;
1023 uint8_t dfmt
: 4; /* Data Format of data in memory buffer */
1024 uint8_t nfmt
: 3; /* Numeric format of data in memory */
1025 bool offen
: 1; /* Supply an offset from VGPR (VADDR) */
1026 bool idxen
: 1; /* Supply an index from VGPR (VADDR) */
1027 bool glc
: 1; /* globally coherent */
1028 bool dlc
: 1; /* NAVI: device level coherent */
1029 bool slc
: 1; /* system level coherent */
1030 bool tfe
: 1; /* texture fail enable */
1031 bool disable_wqm
: 1; /* Require an exec mask without helper invocations */
1032 bool can_reorder
: 1;
1033 uint32_t padding
: 25;
1035 static_assert(sizeof(MTBUF_instruction
) == sizeof(Instruction
) + 8);
1038 * Vector Memory Image Instructions
1039 * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
1040 * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
1041 * or VDATA - Vector GPR for write data.
1042 * Operand(2): VADDR - Address source. Can carry an offset or an index.
1043 * Definition(0): VDATA - Vector GPR for read result.
1046 struct MIMG_instruction
: public Instruction
{
1047 uint8_t dmask
; /* Data VGPR enable mask */
1048 uint8_t dim
: 3; /* NAVI: dimensionality */
1049 bool unrm
: 1; /* Force address to be un-normalized */
1050 bool dlc
: 1; /* NAVI: device level coherent */
1051 bool glc
: 1; /* globally coherent */
1052 bool slc
: 1; /* system level coherent */
1053 bool tfe
: 1; /* texture fail enable */
1054 bool da
: 1; /* declare an array */
1055 bool lwe
: 1; /* Force data to be un-normalized */
1056 bool r128
: 1; /* NAVI: Texture resource size */
1057 bool a16
: 1; /* VEGA, NAVI: Address components are 16-bits */
1058 bool d16
: 1; /* Convert 32-bit data to 16-bit data */
1059 bool disable_wqm
: 1; /* Require an exec mask without helper invocations */
1060 bool can_reorder
: 1;
1061 uint8_t padding
: 1;
1062 barrier_interaction barrier
;
1064 static_assert(sizeof(MIMG_instruction
) == sizeof(Instruction
) + 4);
1067 * Flat/Scratch/Global Instructions
1070 * Operand(2) / Definition(0): DATA/VDST
1073 struct FLAT_instruction
: public Instruction
{
1074 uint16_t offset
; /* Vega/Navi only */
1075 bool slc
: 1; /* system level coherent */
1076 bool glc
: 1; /* globally coherent */
1077 bool dlc
: 1; /* NAVI: device level coherent */
1080 bool disable_wqm
: 1; /* Require an exec mask without helper invocations */
1081 bool can_reorder
: 1;
1082 uint8_t padding
: 1;
1083 barrier_interaction barrier
;
1085 static_assert(sizeof(FLAT_instruction
) == sizeof(Instruction
) + 4);
1087 struct Export_instruction
: public Instruction
{
1088 uint8_t enabled_mask
;
1090 bool compressed
: 1;
1092 bool valid_mask
: 1;
1093 uint32_t padding
: 13;
1095 static_assert(sizeof(Export_instruction
) == sizeof(Instruction
) + 4);
1097 struct Pseudo_instruction
: public Instruction
{
1098 PhysReg scratch_sgpr
; /* might not be valid if it's not needed */
1102 static_assert(sizeof(Pseudo_instruction
) == sizeof(Instruction
) + 4);
1104 struct Pseudo_branch_instruction
: public Instruction
{
1105 /* target[0] is the block index of the branch target.
1106 * For conditional branches, target[1] contains the fall-through alternative.
1107 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
1111 static_assert(sizeof(Pseudo_branch_instruction
) == sizeof(Instruction
) + 8);
1113 struct Pseudo_barrier_instruction
: public Instruction
{
1115 static_assert(sizeof(Pseudo_barrier_instruction
) == sizeof(Instruction
) + 0);
1117 enum ReduceOp
: uint16_t {
1131 gfx10_wave64_bpermute
1135 * Subgroup Reduction Instructions, everything except for the data to be
1136 * reduced and the result as inserted by setup_reduce_temp().
1137 * Operand(0): data to be reduced
1138 * Operand(1): reduce temporary
1139 * Operand(2): vector temporary
1140 * Definition(0): result
1141 * Definition(1): scalar temporary
1142 * Definition(2): scalar identity temporary (not used to store identity on GFX10)
1143 * Definition(3): scc clobber
1144 * Definition(4): vcc clobber
1147 struct Pseudo_reduction_instruction
: public Instruction
{
1149 uint16_t cluster_size
; // must be 0 for scans
1151 static_assert(sizeof(Pseudo_reduction_instruction
) == sizeof(Instruction
) + 4);
1153 struct instr_deleter_functor
{
1154 void operator()(void* p
) {
1159 template<typename T
>
1160 using aco_ptr
= std::unique_ptr
<T
, instr_deleter_functor
>;
1162 template<typename T
>
1163 T
* create_instruction(aco_opcode opcode
, Format format
, uint32_t num_operands
, uint32_t num_definitions
)
1165 std::size_t size
= sizeof(T
) + num_operands
* sizeof(Operand
) + num_definitions
* sizeof(Definition
);
1166 char *data
= (char*) calloc(1, size
);
1167 T
* inst
= (T
*) data
;
1169 inst
->opcode
= opcode
;
1170 inst
->format
= format
;
1172 uint16_t operands_offset
= data
+ sizeof(T
) - (char*)&inst
->operands
;
1173 inst
->operands
= aco::span
<Operand
>(operands_offset
, num_operands
);
1174 uint16_t definitions_offset
= (char*)inst
->operands
.end() - (char*)&inst
->definitions
;
1175 inst
->definitions
= aco::span
<Definition
>(definitions_offset
, num_definitions
);
1180 constexpr bool Instruction::usesModifiers() const noexcept
1182 if (isDPP() || isSDWA())
1185 if (format
== Format::VOP3P
) {
1186 const VOP3P_instruction
*vop3p
= static_cast<const VOP3P_instruction
*>(this);
1187 for (unsigned i
= 0; i
< operands
.size(); i
++) {
1188 if (vop3p
->neg_lo
[i
] || vop3p
->neg_hi
[i
])
1191 return vop3p
->opsel_lo
|| vop3p
->opsel_hi
|| vop3p
->clamp
;
1192 } else if (isVOP3()) {
1193 const VOP3A_instruction
*vop3
= static_cast<const VOP3A_instruction
*>(this);
1194 for (unsigned i
= 0; i
< operands
.size(); i
++) {
1195 if (vop3
->abs
[i
] || vop3
->neg
[i
])
1198 return vop3
->opsel
|| vop3
->clamp
|| vop3
->omod
;
1203 constexpr bool is_phi(Instruction
* instr
)
1205 return instr
->opcode
== aco_opcode::p_phi
|| instr
->opcode
== aco_opcode::p_linear_phi
;
1208 static inline bool is_phi(aco_ptr
<Instruction
>& instr
)
1210 return is_phi(instr
.get());
1213 barrier_interaction
get_barrier_interaction(const Instruction
* instr
);
1215 bool is_dead(const std::vector
<uint16_t>& uses
, Instruction
*instr
);
1218 /* uniform indicates that leaving this block,
1219 * all actives lanes stay active */
1220 block_kind_uniform
= 1 << 0,
1221 block_kind_top_level
= 1 << 1,
1222 block_kind_loop_preheader
= 1 << 2,
1223 block_kind_loop_header
= 1 << 3,
1224 block_kind_loop_exit
= 1 << 4,
1225 block_kind_continue
= 1 << 5,
1226 block_kind_break
= 1 << 6,
1227 block_kind_continue_or_break
= 1 << 7,
1228 block_kind_discard
= 1 << 8,
1229 block_kind_branch
= 1 << 9,
1230 block_kind_merge
= 1 << 10,
1231 block_kind_invert
= 1 << 11,
1232 block_kind_uses_discard_if
= 1 << 12,
1233 block_kind_needs_lowering
= 1 << 13,
1234 block_kind_uses_demote
= 1 << 14,
1235 block_kind_export_end
= 1 << 15,
1239 struct RegisterDemand
{
1240 constexpr RegisterDemand() = default;
1241 constexpr RegisterDemand(const int16_t v
, const int16_t s
) noexcept
1242 : vgpr
{v
}, sgpr
{s
} {}
1246 constexpr friend bool operator==(const RegisterDemand a
, const RegisterDemand b
) noexcept
{
1247 return a
.vgpr
== b
.vgpr
&& a
.sgpr
== b
.sgpr
;
1250 constexpr bool exceeds(const RegisterDemand other
) const noexcept
{
1251 return vgpr
> other
.vgpr
|| sgpr
> other
.sgpr
;
1254 constexpr RegisterDemand
operator+(const Temp t
) const noexcept
{
1255 if (t
.type() == RegType::sgpr
)
1256 return RegisterDemand( vgpr
, sgpr
+ t
.size() );
1258 return RegisterDemand( vgpr
+ t
.size(), sgpr
);
1261 constexpr RegisterDemand
operator+(const RegisterDemand other
) const noexcept
{
1262 return RegisterDemand(vgpr
+ other
.vgpr
, sgpr
+ other
.sgpr
);
1265 constexpr RegisterDemand
operator-(const RegisterDemand other
) const noexcept
{
1266 return RegisterDemand(vgpr
- other
.vgpr
, sgpr
- other
.sgpr
);
1269 constexpr RegisterDemand
& operator+=(const RegisterDemand other
) noexcept
{
1275 constexpr RegisterDemand
& operator-=(const RegisterDemand other
) noexcept
{
1281 constexpr RegisterDemand
& operator+=(const Temp t
) noexcept
{
1282 if (t
.type() == RegType::sgpr
)
1289 constexpr RegisterDemand
& operator-=(const Temp t
) noexcept
{
1290 if (t
.type() == RegType::sgpr
)
1297 constexpr void update(const RegisterDemand other
) noexcept
{
1298 vgpr
= std::max(vgpr
, other
.vgpr
);
1299 sgpr
= std::max(sgpr
, other
.sgpr
);
1308 unsigned offset
= 0;
1309 std::vector
<aco_ptr
<Instruction
>> instructions
;
1310 std::vector
<unsigned> logical_preds
;
1311 std::vector
<unsigned> linear_preds
;
1312 std::vector
<unsigned> logical_succs
;
1313 std::vector
<unsigned> linear_succs
;
1314 RegisterDemand register_demand
= RegisterDemand();
1315 uint16_t loop_nest_depth
= 0;
1317 int logical_idom
= -1;
1318 int linear_idom
= -1;
1319 Temp live_out_exec
= Temp();
1321 /* this information is needed for predecessors to blocks with phis when
1322 * moving out of ssa */
1323 bool scc_live_out
= false;
1324 PhysReg scratch_sgpr
= PhysReg(); /* only needs to be valid if scc_live_out != false */
1326 Block(unsigned idx
) : index(idx
) {}
1327 Block() : index(0) {}
1330 using Stage
= uint16_t;
1332 /* software stages */
1333 static constexpr Stage sw_vs
= 1 << 0;
1334 static constexpr Stage sw_gs
= 1 << 1;
1335 static constexpr Stage sw_tcs
= 1 << 2;
1336 static constexpr Stage sw_tes
= 1 << 3;
1337 static constexpr Stage sw_fs
= 1 << 4;
1338 static constexpr Stage sw_cs
= 1 << 5;
1339 static constexpr Stage sw_gs_copy
= 1 << 6;
1340 static constexpr Stage sw_mask
= 0x7f;
1342 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1343 static constexpr Stage hw_vs
= 1 << 7;
1344 static constexpr Stage hw_es
= 1 << 8; /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
1345 static constexpr Stage hw_gs
= 1 << 9; /* Geometry shader on GFX10/legacy and GFX6-9. */
1346 static constexpr Stage hw_ngg_gs
= 1 << 10; /* Geometry shader on GFX10/NGG. */
1347 static constexpr Stage hw_ls
= 1 << 11; /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
1348 static constexpr Stage hw_hs
= 1 << 12; /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
1349 static constexpr Stage hw_fs
= 1 << 13;
1350 static constexpr Stage hw_cs
= 1 << 14;
1351 static constexpr Stage hw_mask
= 0xff << 7;
1353 /* possible settings of Program::stage */
1354 static constexpr Stage vertex_vs
= sw_vs
| hw_vs
;
1355 static constexpr Stage fragment_fs
= sw_fs
| hw_fs
;
1356 static constexpr Stage compute_cs
= sw_cs
| hw_cs
;
1357 static constexpr Stage tess_eval_vs
= sw_tes
| hw_vs
;
1358 static constexpr Stage gs_copy_vs
= sw_gs_copy
| hw_vs
;
1360 static constexpr Stage ngg_vertex_gs
= sw_vs
| hw_ngg_gs
;
1361 static constexpr Stage ngg_vertex_geometry_gs
= sw_vs
| sw_gs
| hw_ngg_gs
;
1362 static constexpr Stage ngg_tess_eval_gs
= sw_tes
| hw_ngg_gs
;
1363 static constexpr Stage ngg_tess_eval_geometry_gs
= sw_tes
| sw_gs
| hw_ngg_gs
;
1364 /* GFX9 (and GFX10 if NGG isn't used) */
1365 static constexpr Stage vertex_geometry_gs
= sw_vs
| sw_gs
| hw_gs
;
1366 static constexpr Stage vertex_tess_control_hs
= sw_vs
| sw_tcs
| hw_hs
;
1367 static constexpr Stage tess_eval_geometry_gs
= sw_tes
| sw_gs
| hw_gs
;
1369 static constexpr Stage vertex_ls
= sw_vs
| hw_ls
; /* vertex before tesselation control */
1370 static constexpr Stage vertex_es
= sw_vs
| hw_es
; /* vertex before geometry */
1371 static constexpr Stage tess_control_hs
= sw_tcs
| hw_hs
;
1372 static constexpr Stage tess_eval_es
= sw_tes
| hw_es
; /* tesselation evaluation before geometry */
1373 static constexpr Stage geometry_gs
= sw_gs
| hw_gs
;
1377 statistic_instructions
,
1381 statistic_vmem_clauses
,
1382 statistic_smem_clauses
,
1383 statistic_vmem_score
,
1384 statistic_smem_score
,
1385 statistic_sgpr_presched
,
1386 statistic_vgpr_presched
,
1390 class Program final
{
1392 float_mode next_fp_mode
;
1393 std::vector
<Block
> blocks
;
1394 RegisterDemand max_reg_demand
= RegisterDemand();
1395 uint16_t num_waves
= 0;
1396 uint16_t max_waves
= 0; /* maximum number of waves, regardless of register usage */
1397 ac_shader_config
* config
;
1398 struct radv_shader_info
*info
;
1399 enum chip_class chip_class
;
1400 enum radeon_family family
;
1403 Stage stage
; /* Stage */
1404 bool needs_exact
= false; /* there exists an instruction with disable_wqm = true */
1405 bool needs_wqm
= false; /* there exists a p_wqm instruction */
1406 bool wb_smem_l1_on_end
= false;
1408 std::vector
<uint8_t> constant_data
;
1409 Temp private_segment_buffer
;
1410 Temp scratch_offset
;
1412 uint16_t min_waves
= 0;
1413 uint16_t lds_alloc_granule
;
1414 uint32_t lds_limit
; /* in bytes */
1415 bool has_16bank_lds
;
1416 uint16_t vgpr_limit
;
1417 uint16_t sgpr_limit
;
1418 uint16_t physical_sgprs
;
1419 uint16_t sgpr_alloc_granule
; /* minus one. must be power of two */
1420 uint16_t vgpr_alloc_granule
; /* minus one. must be power of two */
1421 unsigned workgroup_size
; /* if known; otherwise UINT_MAX */
1423 bool xnack_enabled
= false;
1425 bool needs_vcc
= false;
1426 bool needs_flat_scr
= false;
1428 bool collect_statistics
= false;
1429 uint32_t statistics
[num_statistics
];
1431 uint32_t allocateId()
1433 assert(allocationID
<= 16777215);
1434 return allocationID
++;
1437 uint32_t peekAllocationId()
1439 return allocationID
;
1442 void setAllocationId(uint32_t id
)
1447 Block
* create_and_insert_block() {
1448 blocks
.emplace_back(blocks
.size());
1449 blocks
.back().fp_mode
= next_fp_mode
;
1450 return &blocks
.back();
1453 Block
* insert_block(Block
&& block
) {
1454 block
.index
= blocks
.size();
1455 block
.fp_mode
= next_fp_mode
;
1456 blocks
.emplace_back(std::move(block
));
1457 return &blocks
.back();
1461 uint32_t allocationID
= 1;
1465 std::size_t operator()(Temp t
) const {
1469 using TempSet
= std::unordered_set
<Temp
, TempHash
>;
1472 /* live temps out per block */
1473 std::vector
<TempSet
> live_out
;
1474 /* register demand (sgpr/vgpr) per instruction per block */
1475 std::vector
<std::vector
<RegisterDemand
>> register_demand
;
1478 void select_program(Program
*program
,
1479 unsigned shader_count
,
1480 struct nir_shader
*const *shaders
,
1481 ac_shader_config
* config
,
1482 struct radv_shader_args
*args
);
1483 void select_gs_copy_shader(Program
*program
, struct nir_shader
*gs_shader
,
1484 ac_shader_config
* config
,
1485 struct radv_shader_args
*args
);
1487 void lower_wqm(Program
* program
, live
& live_vars
,
1488 const struct radv_nir_compiler_options
*options
);
1489 void lower_phis(Program
* program
);
1490 void calc_min_waves(Program
* program
);
1491 void update_vgpr_sgpr_demand(Program
* program
, const RegisterDemand new_demand
);
1492 live
live_var_analysis(Program
* program
, const struct radv_nir_compiler_options
*options
);
1493 std::vector
<uint16_t> dead_code_analysis(Program
*program
);
1494 void dominator_tree(Program
* program
);
1495 void insert_exec_mask(Program
*program
);
1496 void value_numbering(Program
* program
);
1497 void optimize(Program
* program
);
1498 void setup_reduce_temp(Program
* program
);
1499 void lower_to_cssa(Program
* program
, live
& live_vars
, const struct radv_nir_compiler_options
*options
);
1500 void register_allocation(Program
*program
, std::vector
<TempSet
>& live_out_per_block
);
1501 void ssa_elimination(Program
* program
);
1502 void lower_to_hw_instr(Program
* program
);
1503 void schedule_program(Program
* program
, live
& live_vars
);
1504 void spill(Program
* program
, live
& live_vars
, const struct radv_nir_compiler_options
*options
);
1505 void insert_wait_states(Program
* program
);
1506 void insert_NOPs(Program
* program
);
1507 unsigned emit_program(Program
* program
, std::vector
<uint32_t>& code
);
1508 void print_asm(Program
*program
, std::vector
<uint32_t>& binary
,
1509 unsigned exec_size
, std::ostream
& out
);
1510 void validate(Program
* program
, FILE *output
);
1511 bool validate_ra(Program
* program
, const struct radv_nir_compiler_options
*options
, FILE *output
);
1513 void perfwarn(bool cond
, const char *msg
, Instruction
*instr
=NULL
);
1515 #define perfwarn(program, cond, msg, ...) do {} while(0)
1518 void collect_presched_stats(Program
*program
);
1519 void collect_preasm_stats(Program
*program
);
1520 void collect_postasm_stats(Program
*program
, const std::vector
<uint32_t>& code
);
1522 void aco_print_instr(Instruction
*instr
, FILE *output
);
1523 void aco_print_program(Program
*program
, FILE *output
);
1525 /* utilities for dealing with register demand */
1526 RegisterDemand
get_live_changes(aco_ptr
<Instruction
>& instr
);
1527 RegisterDemand
get_temp_registers(aco_ptr
<Instruction
>& instr
);
1528 RegisterDemand
get_demand_before(RegisterDemand demand
, aco_ptr
<Instruction
>& instr
, aco_ptr
<Instruction
>& instr_before
);
1530 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1531 uint16_t get_extra_sgprs(Program
*program
);
1533 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1534 uint16_t get_sgpr_alloc(Program
*program
, uint16_t addressable_sgprs
);
1535 uint16_t get_vgpr_alloc(Program
*program
, uint16_t addressable_vgprs
);
1537 /* return number of addressable sgprs/vgprs for max_waves */
1538 uint16_t get_addr_sgpr_from_waves(Program
*program
, uint16_t max_waves
);
1539 uint16_t get_addr_vgpr_from_waves(Program
*program
, uint16_t max_waves
);
1542 const int16_t opcode_gfx7
[static_cast<int>(aco_opcode::num_opcodes
)];
1543 const int16_t opcode_gfx9
[static_cast<int>(aco_opcode::num_opcodes
)];
1544 const int16_t opcode_gfx10
[static_cast<int>(aco_opcode::num_opcodes
)];
1545 const std::bitset
<static_cast<int>(aco_opcode::num_opcodes
)> can_use_input_modifiers
;
1546 const std::bitset
<static_cast<int>(aco_opcode::num_opcodes
)> can_use_output_modifiers
;
1547 const std::bitset
<static_cast<int>(aco_opcode::num_opcodes
)> is_atomic
;
1548 const char *name
[static_cast<int>(aco_opcode::num_opcodes
)];
1549 const aco::Format format
[static_cast<int>(aco_opcode::num_opcodes
)];
1552 extern const Info instr_info
;
1556 #endif /* ACO_IR_H */