aco: Initial GFX7 Support

author Daniel Schürmann <daniel@schuermann.dev>

Mon, 4 Nov 2019 17:02:47 +0000 (18:02 +0100)

committer Daniel Schürmann <daniel@schuermann.dev>

Sat, 7 Dec 2019 10:23:11 +0000 (11:23 +0100)
author Daniel Schürmann <daniel@schuermann.dev>
Mon, 4 Nov 2019 17:02:47 +0000 (18:02 +0100)
committer Daniel Schürmann <daniel@schuermann.dev>
Sat, 7 Dec 2019 10:23:11 +0000 (11:23 +0100)
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp

index 54dc520bb192e269eddd75b3da2fcd10b0b61f9a..bc905f9f969cafdb6e1cd9b29dac49e49e1ed207 100644 (file)
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -17,7 +17,9 @@ struct asm_context {
     // TODO: keep track of branch instructions referring blocks
     // and, when emitting the block, correct the offset in instr
     asm_context(Program* program) : program(program), chip_class(program->chip_class) {
-      if (chip_class <= GFX9)
+      if (chip_class <= GFX7)
+         opcode = &instr_info.opcode_gfx7[0];
+      else if (chip_class <= GFX9)
           opcode = &instr_info.opcode_gfx9[0];
        else if (chip_class == GFX10)
           opcode = &instr_info.opcode_gfx10[0];
@@ -145,9 +147,26 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
        SMEM_instruction* smem = static_cast<SMEM_instruction*>(instr);
        bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
        bool is_load = !instr->definitions.empty();
-
        uint32_t encoding = 0;
  
+      if (ctx.chip_class <= GFX7) {
+         encoding = (0b11000 << 27);
+         encoding |= opcode << 22;
+         encoding |= instr->definitions.size() ? instr->definitions[0].physReg() << 15 : 0;
+         encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0;
+         if (!instr->operands[1].isConstant() || instr->operands[1].constantValue() >= 1024) {
+            encoding |= instr->operands[1].physReg().reg;
+         } else {
+            encoding |= instr->operands[1].constantValue() >> 2;
+            encoding |= 1 << 8;
+         }
+         out.push_back(encoding);
+         /* SMRD instructions can take a literal on GFX6 & GFX7 */
+         if (instr->operands[1].isConstant() && instr->operands[1].constantValue() >= 1024)
+            out.push_back(instr->operands[1].constantValue() >> 2);
+         return;
+      }
+
        if (ctx.chip_class <= GFX9) {
           encoding = (0b110000 << 26);
           assert(!smem->dlc); /* Device-level coherent is not supported on GFX9 and lower */
@@ -291,7 +310,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
        encoding |= (mubuf->glc ? 1 : 0) << 14;
        encoding |= (mubuf->idxen ? 1 : 0) << 13;
        encoding |= (mubuf->offen ? 1 : 0) << 12;
-      if (ctx.chip_class <= GFX9) {
+      if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
           assert(!mubuf->dlc); /* Device-level coherent is not supported on GFX9 and lower */
           encoding |= (mubuf->slc ? 1 : 0) << 17;
        } else if (ctx.chip_class >= GFX10) {
@@ -326,7 +345,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
        encoding |= 0x0FFF & mtbuf->offset;
        encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
  
-      if (ctx.chip_class <= GFX9) {
+      if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
           encoding |= opcode << 15;
        } else {
           encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */
@@ -444,9 +463,9 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
     case Format::EXP: {
        Export_instruction* exp = static_cast<Export_instruction*>(instr);
        uint32_t encoding;
-      if (ctx.chip_class <= GFX9) {
+      if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
           encoding = (0b110001 << 26);
-      } else if (ctx.chip_class >= GFX10) {
+      } else {
           encoding = (0b111110 << 26);
        }
  
@@ -473,12 +492,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
           if ((uint16_t) instr->format & (uint16_t) Format::VOP2) {
              opcode = opcode + 0x100;
           } else if ((uint16_t) instr->format & (uint16_t) Format::VOP1) {
-            if (ctx.chip_class <= GFX9) {
+            if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9)
                 opcode = opcode + 0x140;
-            } else {
-               /* RDNA ISA doc says this is 0x140, but that doesn't work  */
+            else
                 opcode = opcode + 0x180;
-            }
           } else if ((uint16_t) instr->format & (uint16_t) Format::VOPC) {
              opcode = opcode + 0x0;
           } else if ((uint16_t) instr->format & (uint16_t) Format::VINTRP) {
@@ -492,8 +509,13 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
              encoding = (0b110101 << 26);
           }
  
-         encoding |= opcode << 16;
-         encoding |= (vop3->clamp ? 1 : 0) << 15;
+         if (ctx.chip_class <= GFX7) {
+            encoding |= opcode << 17;
+            encoding |= (vop3->clamp ? 1 : 0) << 11;
+         } else {
+            encoding |= opcode << 16;
+            encoding |= (vop3->clamp ? 1 : 0) << 15;
+         }
           for (unsigned i = 0; i < 3; i++)
              encoding |= vop3->abs[i] << (8+i);
           for (unsigned i = 0; i < 4; i++)
@@ -515,6 +537,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
           out.push_back(encoding);
  
        } else if (instr->isDPP()){
+         assert(ctx.chip_class >= GFX8);
           /* first emit the instruction without the DPP operand */
           Operand dpp_op = instr->operands[0];
           instr->operands[0] = Operand(PhysReg{250}, v1);
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h

index 1f4721f5ffdf31ca236644bdca39b68595054c8d..10661858ca0085cabfa017d54110263429598c7d 100644 (file)
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1259,6 +1259,7 @@ uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
  uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
  
  typedef struct {
+   const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
     const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
     const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
     const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py

index a4b02507eda7c2149b2281d246c1d0e94e211757..f9697420ae05399f0791a9e6c49311ecf5482ad2 100644 (file)
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -155,7 +155,7 @@ class Opcode(object):
     """Class that represents all the information we have about the opcode
     NOTE: this must be kept in sync with aco_op_info
     """
-   def __init__(self, name, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod):
+   def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod):
        """Parameters:
  
        - name is the name of the opcode (prepend nir_op_ for the enum name)
@@ -167,6 +167,7 @@ class Opcode(object):
          constant value of the opcode given the constant values of its inputs.
        """
        assert isinstance(name, str)
+      assert isinstance(opcode_gfx7, int)
        assert isinstance(opcode_gfx9, int)
        assert isinstance(opcode_gfx10, int)
        assert isinstance(format, Format)
@@ -174,6 +175,7 @@ class Opcode(object):
        assert isinstance(output_mod, bool)
  
        self.name = name
+      self.opcode_gfx7 = opcode_gfx7
        self.opcode_gfx9 = opcode_gfx9
        self.opcode_gfx10 = opcode_gfx10
        self.input_mod = "1" if input_mod else "0"
@@ -184,14 +186,11 @@ class Opcode(object):
  # global dictionary of opcodes
  opcodes = {}
  
-# VOPC to GFX6 opcode translation map
-VOPC_GFX6 = [0] * 256
-
-def opcode(name, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False):
+def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False):
     assert name not in opcodes
-   opcodes[name] = Opcode(name, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod)
+   opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod)
  
-opcode("exp", 0, 0, format = Format.EXP)
+opcode("exp", 0, 0, 0, format = Format.EXP)
  opcode("p_parallelcopy")
  opcode("p_startpgm")
  opcode("p_phi")
@@ -302,7 +301,7 @@ SOP2 = {
     (  -1,   -1,   -1, 0x2d, 0x36, "s_mul_hi_i32"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP2:
-    opcode(name, gfx9, gfx10, Format.SOP2)
+    opcode(name, gfx7, gfx9, gfx10, Format.SOP2)
  
  
  # SOPK instructions: 0 input (+ imm), 1 output + optional scc
@@ -338,7 +337,7 @@ SOPK = {
     (  -1,   -1,   -1,   -1, 0x1c, "s_subvector_loop_end"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPK:
-   opcode(name, gfx9, gfx10, Format.SOPK)
+   opcode(name, gfx7, gfx9, gfx10, Format.SOPK)
  
  
  # SOP1 instructions: 1 input, 1 output (+optional SCC)
@@ -416,7 +415,7 @@ SOP1 = {
     (  -1,   -1,   -1,   -1,   -1, "p_constaddr"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP1:
-   opcode(name, gfx9, gfx10, Format.SOP1)
+   opcode(name, gfx7, gfx9, gfx10, Format.SOP1)
  
  
  # SOPC instructions: 2 inputs and 0 outputs (+SCC)
@@ -444,7 +443,7 @@ SOPC = {
     (  -1,   -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC:
-   opcode(name, gfx9, gfx10, Format.SOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.SOPC)
  
  
  # SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs
@@ -491,7 +490,7 @@ SOPP = {
     (  -1,   -1,   -1,   -1, 0x26, "s_ttracedata_imm"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPP:
-   opcode(name, gfx9, gfx10, Format.SOPP)
+   opcode(name, gfx7, gfx9, gfx10, Format.SOPP)
  
  
  # SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output
@@ -585,7 +584,7 @@ SMEM = {
     (  -1,   -1,   -1, 0xac, 0xac, "s_atomic_dec_x2"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM:
-   opcode(name, gfx9, gfx10, Format.SMEM)
+   opcode(name, gfx7, gfx9, gfx10, Format.SMEM)
  
  
  # VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
@@ -663,7 +662,7 @@ VOP2 = {
     (  -1,   -1,   -1,   -1, 0x3c, "v_pk_fmac_f16", False),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2:
-   opcode(name, gfx9, gfx10, Format.VOP2, modifiers, modifiers)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOP2, modifiers, modifiers)
  
  
  # VOP1 instructions: instructions with 1 input and 1 output
@@ -763,7 +762,7 @@ VOP1 = {
     (  -1,   -1,   -1,   -1, 0x68, "v_swaprel_b32", False, False),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP1:
-   opcode(name, gfx9, gfx10, Format.VOP1, in_mod, out_mod)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOP1, in_mod, out_mod)
  
  
  # VOPC instructions:
@@ -777,29 +776,29 @@ VOPC_CLASS = {
     (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in VOPC_CLASS:
-    opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+    opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  
  COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]
  
  for i in range(8):
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16")
-   opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16")
-   opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16")
-   opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16")
-   opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
  
  for i in range(16):
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32")
-   opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32")
-   opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64")
-   opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64")
-   opcode(name, gfx9, gfx10, Format.VOPC, True, False)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
     # GFX_6_7
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32")
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32")
@@ -811,41 +810,41 @@ COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]
  # GFX_8_9
  for i in [0,7]: # only 0 and 7
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  
  for i in range(1, 7): # [1..6]
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  
  for i in range(8):
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
     (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64")
-   opcode(name, gfx9, gfx10, Format.VOPC)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
  
  
  # VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output
@@ -876,7 +875,7 @@ VOPP = {
  # note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here
  # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name)
  for (code, name) in VOPP:
-   opcode(name, code, code, Format.VOP3P)
+   opcode(name, -1, code, code, Format.VOP3P)
  
  
  # VINTERP instructions: 
@@ -887,7 +886,7 @@ VINTRP = {
  }
  # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
  for (code, name) in VINTRP:
-   opcode(name, code, code, Format.VINTRP)
+   opcode(name, code, code, code, Format.VINTRP)
  
  # VOP3 instructions: 3 inputs, 1 output
  # VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out
@@ -1015,7 +1014,7 @@ VOP3 = {
  # TODO: many 16bit instructions moved from VOP2 to VOP3 on GFX10
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP3:
-   opcode(name, gfx9, gfx10, Format.VOP3A, in_mod, out_mod)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOP3A, in_mod, out_mod)
  
  
  # DS instructions: 3 inputs (1 addr, 2 data), 1 output
@@ -1177,7 +1176,7 @@ DS = {
     (  -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS:
-    opcode(name, gfx9, gfx10, Format.DS)
+    opcode(name, gfx7, gfx9, gfx10, Format.DS)
  
  # MUBUF instructions:
  MUBUF = {
@@ -1262,7 +1261,7 @@ MUBUF = {
     (  -1,   -1,   -1,   -1, 0x72, "buffer_gl1_inv"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF:
-    opcode(name, gfx9, gfx10, Format.MUBUF)
+    opcode(name, gfx7, gfx9, gfx10, Format.MUBUF)
  
  MTBUF = {
     (0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
@@ -1283,7 +1282,7 @@ MTBUF = {
     (  -1,   -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),
  }
  for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF:
-    opcode(name, gfx9, gfx10, Format.MTBUF)
+    opcode(name, gfx7, gfx9, gfx10, Format.MTBUF)
  
  
  IMAGE = {
@@ -1302,7 +1301,7 @@ IMAGE = {
  }
  # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
  for (code, name) in IMAGE:
-   opcode(name, code, code, Format.MIMG)
+   opcode(name, code, code, code, Format.MIMG)
  
  IMAGE_ATOMIC = {
     (0x0f, 0x0f, 0x10, "image_atomic_swap"),
@@ -1326,7 +1325,7 @@ IMAGE_ATOMIC = {
  # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)
  # gfx7 and gfx10 opcodes are the same here
  for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC:
-   opcode(name, gfx89, gfx7, Format.MIMG)
+   opcode(name, gfx7, gfx89, gfx7, Format.MIMG)
  
  IMAGE_SAMPLE = {
     (0x20, "image_sample"),
@@ -1372,7 +1371,7 @@ IMAGE_SAMPLE = {
  }
  # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
  for (code, name) in IMAGE_SAMPLE:
-   opcode(name, code, code, Format.MIMG)
+   opcode(name, code, code, code, Format.MIMG)
  
  IMAGE_GATHER4 = {
     (0x40, "image_gather4"),
@@ -1405,7 +1404,7 @@ IMAGE_GATHER4 = {
  }
  # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
  for (code, name) in IMAGE_GATHER4:
-   opcode(name, code, code, Format.MIMG)
+   opcode(name, code, code, code, Format.MIMG)
  
  
  FLAT = {
@@ -1466,7 +1465,7 @@ FLAT = {
     (0x60,   -1, 0x60, "flat_atomic_fmax_x2"),
  }
  for (gfx7, gfx8, gfx10, name) in FLAT:
-    opcode(name, gfx8, gfx10, Format.FLAT)
+    opcode(name, gfx7, gfx8, gfx10, Format.FLAT)
  
  GLOBAL = {
     #GFX8_9, GFX10
@@ -1526,7 +1525,7 @@ GLOBAL = {
     (  -1, 0x60, "global_atomic_fmax_x2"),
  }
  for (gfx8, gfx10, name) in GLOBAL:
-    opcode(name, gfx8, gfx10, Format.GLOBAL)
+    opcode(name, -1, gfx8, gfx10, Format.GLOBAL)
  
  SCRATCH = {
     #GFX8_9, GFX10
@@ -1554,7 +1553,7 @@ SCRATCH = {
     (0x25, 0x25, "scratch_load_short_d16_hi"),
  }
  for (gfx8, gfx10, name) in SCRATCH:
-    opcode(name, gfx8, gfx10, Format.SCRATCH)
+    opcode(name, -1, gfx8, gfx10, Format.SCRATCH)
  
  # check for duplicate opcode numbers
  for ver in ['gfx9', 'gfx10']:
diff --git a/src/amd/compiler/aco_opcodes_cpp.py b/src/amd/compiler/aco_opcodes_cpp.py

index 83c24e0eb447b3b953d1cb53c1540f83e0c879e2..834da904b8817a64ad573f24e5d72de0386b62cc 100644 (file)
--- a/src/amd/compiler/aco_opcodes_cpp.py
+++ b/src/amd/compiler/aco_opcodes_cpp.py
@@ -28,11 +28,6 @@ template = """\
  
  namespace aco {
  
-const unsigned VOPC_to_GFX6[256] = {
-% for code in VOPC_GFX6:
-    ${code},
-% endfor
-};
  
  <%
  opcode_names = sorted(opcodes.keys())
@@ -41,6 +36,11 @@ can_use_output_modifiers = "".join([opcodes[name].output_mod for name in reverse
  %>
  
  extern const aco::Info instr_info = {
+   .opcode_gfx7 = {
+      % for name in opcode_names:
+      ${opcodes[name].opcode_gfx7},
+      % endfor
+   },
     .opcode_gfx9 = {
        % for name in opcode_names:
        ${opcodes[name].opcode_gfx9},
@@ -68,7 +68,7 @@ extern const aco::Info instr_info = {
  }
  """
  
-from aco_opcodes import opcodes, VOPC_GFX6
+from aco_opcodes import opcodes
  from mako.template import Template
  
-print(Template(template).render(opcodes=opcodes, VOPC_GFX6=VOPC_GFX6))
+print(Template(template).render(opcodes=opcodes))
author	Daniel Schürmann <daniel@schuermann.dev>
	Mon, 4 Nov 2019 17:02:47 +0000 (18:02 +0100)
committer	Daniel Schürmann <daniel@schuermann.dev>
	Sat, 7 Dec 2019 10:23:11 +0000 (11:23 +0100)
src/amd/compiler/aco_assembler.cpp		patch \| blob \| history
src/amd/compiler/aco_ir.h		patch \| blob \| history
src/amd/compiler/aco_opcodes.py		patch \| blob \| history
src/amd/compiler/aco_opcodes_cpp.py		patch \| blob \| history