return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
};
+constexpr Format asSDWA(Format format) {
+ assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
+ return (Format) ((uint32_t) Format::SDWA | (uint32_t) format);
+}
+
enum class RegType {
none = 0,
sgpr,
v6 = 6 | (1 << 5),
v7 = 7 | (1 << 5),
v8 = 8 | (1 << 5),
+ /* byte-sized register class */
+ v1b = v1 | (1 << 7),
+ v2b = v2 | (1 << 7),
+ v3b = v3 | (1 << 7),
+ v4b = v4 | (1 << 7),
+ v6b = v6 | (1 << 7),
+ v8b = v8 | (1 << 7),
/* these are used for WWM and spills to vgpr */
v1_linear = v1 | (1 << 6),
v2_linear = v2 | (1 << 6),
explicit operator bool() = delete;
constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
- constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
+ constexpr bool is_subdword() const { return rc & (1 << 7); }
+ constexpr unsigned bytes() const { return ((unsigned) rc & 0x1F) * (is_subdword() ? 1 : 4); }
+ //TODO: use size() less in favor of bytes()
+ constexpr unsigned size() const { return (bytes() + 3) >> 2; }
constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
+ constexpr RegClass as_subdword() const { return RegClass((RC) (rc | 1 << 7)); }
private:
RC rc;
static constexpr RegClass v6{RegClass::v6};
static constexpr RegClass v7{RegClass::v7};
static constexpr RegClass v8{RegClass::v8};
+static constexpr RegClass v1b{RegClass::v1b};
+static constexpr RegClass v2b{RegClass::v2b};
+static constexpr RegClass v3b{RegClass::v3b};
+static constexpr RegClass v4b{RegClass::v4b};
+static constexpr RegClass v6b{RegClass::v6b};
+static constexpr RegClass v8b{RegClass::v8b};
/**
* Temp Class
* and SSA id.
*/
struct Temp {
- Temp() = default;
+ Temp() noexcept : id_(0), reg_class(0) {}
constexpr Temp(uint32_t id, RegClass cls) noexcept
- : id_(id), reg_class(cls) {}
+ : id_(id), reg_class(uint8_t(cls)) {}
constexpr uint32_t id() const noexcept { return id_; }
- constexpr RegClass regClass() const noexcept { return reg_class; }
+ constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
- constexpr unsigned size() const noexcept { return reg_class.size(); }
- constexpr RegType type() const noexcept { return reg_class.type(); }
- constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
+ constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
+ constexpr unsigned size() const noexcept { return regClass().size(); }
+ constexpr RegType type() const noexcept { return regClass().type(); }
+ constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
private:
- uint32_t id_:24;
- RegClass reg_class;
+ uint32_t id_: 24;
+ uint32_t reg_class : 8;
};
/**
constexpr unsigned reg() const { return reg_b >> 2; }
constexpr unsigned byte() const { return reg_b & 0x3; }
constexpr operator unsigned() const { return reg(); }
+ constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
+ constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
+ constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
uint16_t reg_b = 0;
};
return data_.temp.regClass();
}
+ constexpr unsigned bytes() const noexcept
+ {
+ if (isConstant())
+ return is64BitConst_ ? 8 : 4; //TODO: sub-dword constants
+ else
+ return data_.temp.bytes();
+ }
+
constexpr unsigned size() const noexcept
{
if (isConstant())
return temp.regClass();
}
+ constexpr unsigned bytes() const noexcept
+ {
+ return temp.bytes();
+ }
+
constexpr unsigned size() const noexcept
{
return temp.size();
bool bound_ctrl : 1;
};
+enum sdwa_sel : uint8_t {
+ /* masks */
+ sdwa_wordnum = 0x1,
+ sdwa_bytenum = 0x3,
+ sdwa_asuint = 0x7 | 0x10,
+ sdwa_rasize = 0x3,
+
+ /* flags */
+ sdwa_isword = 0x4,
+ sdwa_sext = 0x8,
+ sdwa_isra = 0x10,
+
+ /* specific values */
+ sdwa_ubyte0 = 0,
+ sdwa_ubyte1 = 1,
+ sdwa_ubyte2 = 2,
+ sdwa_ubyte3 = 3,
+ sdwa_uword0 = sdwa_isword | 0,
+ sdwa_uword1 = sdwa_isword | 1,
+ sdwa_udword = 6,
+
+ sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
+ sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
+ sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
+ sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
+ sdwa_sword0 = sdwa_uword0 | sdwa_sext,
+ sdwa_sword1 = sdwa_uword1 | sdwa_sext,
+ sdwa_sdword = sdwa_udword | sdwa_sext,
+
+ /* register-allocated */
+ sdwa_ubyte = 1 | sdwa_isra,
+ sdwa_uword = 2 | sdwa_isra,
+ sdwa_sbyte = sdwa_ubyte | sdwa_sext,
+ sdwa_sword = sdwa_uword | sdwa_sext,
+};
+
+/**
+ * Sub-Dword Addressing Format:
+ * This format can be used for VOP1, VOP2 or VOPC instructions.
+ *
+ * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
+ * the definition doesn't have to be VCC on GFX9+.
+ *
+ */
+struct SDWA_instruction : public Instruction {
+ /* these destination modifiers aren't available with VOPC except for
+ * clamp on GFX8 */
+ unsigned dst_sel:8;
+ bool dst_preserve:1;
+ bool clamp:1;
+ unsigned omod:2; /* GFX9+ */
+
+ unsigned sel[2];
+ bool neg[2];
+ bool abs[2];
+};
+
struct Interp_instruction : public Instruction {
uint8_t attribute;
uint8_t component;
/* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
static constexpr Stage hw_vs = 1 << 7;
-static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
-static constexpr Stage hw_gs = 1 << 9;
-static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
-static constexpr Stage hw_hs = 1 << 11;
-static constexpr Stage hw_fs = 1 << 12;
-static constexpr Stage hw_cs = 1 << 13;
-static constexpr Stage hw_mask = 0x7f << 7;
+static constexpr Stage hw_es = 1 << 8; /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
+static constexpr Stage hw_gs = 1 << 9; /* Geometry shader on GFX10/legacy and GFX6-9. */
+static constexpr Stage hw_ngg_gs = 1 << 10; /* Geometry shader on GFX10/NGG. */
+static constexpr Stage hw_ls = 1 << 11; /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
+static constexpr Stage hw_hs = 1 << 12; /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
+static constexpr Stage hw_fs = 1 << 13;
+static constexpr Stage hw_cs = 1 << 14;
+static constexpr Stage hw_mask = 0xff << 7;
/* possible settings of Program::stage */
static constexpr Stage vertex_vs = sw_vs | hw_vs;
static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
/* GFX10/NGG */
-static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
-static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
-static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
-static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
+static constexpr Stage ngg_vertex_gs = sw_vs | hw_ngg_gs;
+static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_ngg_gs;
+static constexpr Stage ngg_tess_eval_gs = sw_tes | hw_ngg_gs;
+static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_ngg_gs;
/* GFX9 (and GFX10 if NGG isn't used) */
static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;