X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_ir.h;h=b05f809d21fb7ebe32acdd9b1a23c10a7031afd3;hb=f01bf51a2b5769aa7bb71f3c76f700c3b4257ac1;hp=a62525016fb27608287c053f169bad90b7ed3deb;hpb=507956ed04fcdcfd44419d1b16f032e1d81d0dcb;p=mesa.git

diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index a62525016fb..b05f809d21f 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -169,6 +169,11 @@ constexpr Format asVOP3(Format format) {
    return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
 };
 
+constexpr Format asSDWA(Format format) {
+   assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
+   return (Format) ((uint32_t) Format::SDWA | (uint32_t) format);
+}
+
 enum class RegType {
    none = 0,
    sgpr,
@@ -194,6 +199,13 @@ struct RegClass {
       v6 = 6  | (1 << 5),
       v7 = 7  | (1 << 5),
       v8 = 8  | (1 << 5),
+      /* byte-sized register class */
+      v1b = v1 | (1 << 7),
+      v2b = v2 | (1 << 7),
+      v3b = v3 | (1 << 7),
+      v4b = v4 | (1 << 7),
+      v6b = v6 | (1 << 7),
+      v8b = v8 | (1 << 7),
       /* these are used for WWM and spills to vgpr */
       v1_linear = v1 | (1 << 6),
       v2_linear = v2 | (1 << 6),
@@ -209,9 +221,13 @@ struct RegClass {
    explicit operator bool() = delete;
 
    constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
-   constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
+   constexpr bool is_subdword() const { return rc & (1 << 7); }
+   constexpr unsigned bytes() const { return ((unsigned) rc & 0x1F) * (is_subdword() ? 1 : 4); }
+   //TODO: use size() less in favor of bytes()
+   constexpr unsigned size() const { return (bytes() + 3) >> 2; }
    constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
    constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
+   constexpr RegClass as_subdword() const { return RegClass((RC) (rc | 1 << 7)); }
 
 private:
    RC rc;
@@ -232,6 +248,12 @@ static constexpr RegClass v5{RegClass::v5};
 static constexpr RegClass v6{RegClass::v6};
 static constexpr RegClass v7{RegClass::v7};
 static constexpr RegClass v8{RegClass::v8};
+static constexpr RegClass v1b{RegClass::v1b};
+static constexpr RegClass v2b{RegClass::v2b};
+static constexpr RegClass v3b{RegClass::v3b};
+static constexpr RegClass v4b{RegClass::v4b};
+static constexpr RegClass v6b{RegClass::v6b};
+static constexpr RegClass v8b{RegClass::v8b};
 
 /**
  * Temp Class
@@ -247,6 +269,7 @@ struct Temp {
    constexpr uint32_t id() const noexcept { return id_; }
    constexpr RegClass regClass() const noexcept { return reg_class; }
 
+   constexpr unsigned bytes() const noexcept { return reg_class.bytes(); }
    constexpr unsigned size() const noexcept { return reg_class.size(); }
    constexpr RegType type() const noexcept { return reg_class.type(); }
    constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
@@ -267,10 +290,15 @@ private:
  */
 struct PhysReg {
    constexpr PhysReg() = default;
-   explicit constexpr PhysReg(unsigned r) : reg(r) {}
-   constexpr operator unsigned() const { return reg; }
-
-   uint16_t reg = 0;
+   explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
+   constexpr unsigned reg() const { return reg_b >> 2; }
+   constexpr unsigned byte() const { return reg_b & 0x3; }
+   constexpr operator unsigned() const { return reg(); }
+   constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
+   constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
+   constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
+
+   uint16_t reg_b = 0;
 };
 
 /* helper expressions for special registers */
@@ -423,6 +451,14 @@ public:
       return data_.temp.regClass();
    }
 
+   constexpr unsigned bytes() const noexcept
+   {
+      if (isConstant())
+         return is64BitConst_ ? 8 : 4; //TODO: sub-dword constants
+      else
+         return data_.temp.bytes();
+   }
+
    constexpr unsigned size() const noexcept
    {
       if (isConstant())
@@ -475,12 +511,12 @@ public:
    constexpr uint64_t constantValue64(bool signext=false) const noexcept
    {
       if (is64BitConst_) {
-         if (reg_.reg <= 192)
-            return reg_.reg - 128;
-         else if (reg_.reg <= 208)
-            return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193);
+         if (reg_ <= 192)
+            return reg_ - 128;
+         else if (reg_ <= 208)
+            return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
 
-         switch (reg_.reg) {
+         switch (reg_) {
          case 240:
             return 0x3FE0000000000000;
          case 241:
@@ -640,6 +676,11 @@ public:
       return temp.regClass();
    }
 
+   constexpr unsigned bytes() const noexcept
+   {
+      return temp.bytes();
+   }
+
    constexpr unsigned size() const noexcept
    {
       return temp.size();
@@ -836,6 +877,63 @@ struct DPP_instruction : public Instruction {
    bool bound_ctrl : 1;
 };
 
+enum sdwa_sel : uint8_t {
+    /* masks */
+    sdwa_wordnum = 0x1,
+    sdwa_bytenum = 0x3,
+    sdwa_asuint = 0x7 | 0x10,
+    sdwa_rasize = 0x3,
+
+    /* flags */
+    sdwa_isword = 0x4,
+    sdwa_sext = 0x8,
+    sdwa_isra = 0x10,
+
+    /* specific values */
+    sdwa_ubyte0 = 0,
+    sdwa_ubyte1 = 1,
+    sdwa_ubyte2 = 2,
+    sdwa_ubyte3 = 3,
+    sdwa_uword0 = sdwa_isword | 0,
+    sdwa_uword1 = sdwa_isword | 1,
+    sdwa_udword = 6,
+
+    sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
+    sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
+    sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
+    sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
+    sdwa_sword0 = sdwa_uword0 | sdwa_sext,
+    sdwa_sword1 = sdwa_uword1 | sdwa_sext,
+    sdwa_sdword = sdwa_udword | sdwa_sext,
+
+    /* register-allocated */
+    sdwa_ubyte = 1 | sdwa_isra,
+    sdwa_uword = 2 | sdwa_isra,
+    sdwa_sbyte = sdwa_ubyte | sdwa_sext,
+    sdwa_sword = sdwa_uword | sdwa_sext,
+};
+
+/**
+ * Sub-Dword Addressing Format:
+ * This format can be used for VOP1, VOP2 or VOPC instructions.
+ *
+ * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
+ * the definition doesn't have to be VCC on GFX9+.
+ *
+ */
+struct SDWA_instruction : public Instruction {
+   /* these destination modifiers aren't available with VOPC except for
+    * clamp on GFX8 */
+   unsigned dst_sel:8;
+   bool dst_preserve:1;
+   bool clamp:1;
+   unsigned omod:2; /* GFX9+ */
+
+   unsigned sel[2];
+   bool neg[2];
+   bool abs[2];
+};
+
 struct Interp_instruction : public Instruction {
    uint8_t attribute;
    uint8_t component;