From c0df15e64548b76c09352d5c48013d706b00b3e2 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Timur=20Krist=C3=B3f?= <timur.kristof@gmail.com>
Date: Thu, 26 Sep 2019 17:48:08 +0200
Subject: [PATCH] aco: Support GFX10 MTBUF in aco_assembler.
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Also remove img_format from aco_ir, since it can be calculated
from dfmt and nfmt. So only the assember needs to deal with it.

Signed-off-by: Timur KristÃ³f <timur.kristof@gmail.com>
Reviewed-by: Daniel SchÃ¼rmann <daniel@schuermann.dev>
---
 src/amd/compiler/aco_assembler.cpp | 22 +++++++++++++++++++---
 src/amd/compiler/aco_ir.h          |  9 ++-------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 5136001abcb..41c31aeb8d1 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -2,6 +2,7 @@
 
 #include "aco_ir.h"
 #include "common/sid.h"
+#include "ac_shader_util.h"
 
 namespace aco {
 
@@ -291,16 +292,26 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
    }
    case Format::MTBUF: {
       MTBUF_instruction* mtbuf = static_cast<MTBUF_instruction*>(instr);
+
+      uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt);
       uint32_t encoding = (0b111010 << 26);
-      encoding |= opcode << 15;
+      assert(!mtbuf->dlc || ctx.chip_class >= GFX10);
+      encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
       encoding |= (mtbuf->glc ? 1 : 0) << 14;
       encoding |= (mtbuf->idxen ? 1 : 0) << 13;
       encoding |= (mtbuf->offen ? 1 : 0) << 12;
       encoding |= 0x0FFF & mtbuf->offset;
-      encoding |= (0xF & mtbuf->dfmt) << 19;
-      encoding |= (0x7 & mtbuf->nfmt) << 23;
+      encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
+
+      if (ctx.chip_class <= GFX9) {
+         encoding |= opcode << 15;
+      } else {
+         encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */
+      }
+
       out.push_back(encoding);
       encoding = 0;
+
       encoding |= instr->operands[2].physReg().reg << 24;
       encoding |= (mtbuf->tfe ? 1 : 0) << 23;
       encoding |= (mtbuf->slc ? 1 : 0) << 22;
@@ -308,6 +319,11 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg().reg : instr->definitions[0].physReg().reg;
       encoding |= (0xFF & reg) << 8;
       encoding |= (0xFF & instr->operands[0].physReg().reg);
+
+      if (ctx.chip_class >= GFX10) {
+         encoding |= (((opcode & 0x08) >> 4) << 21); /* MSB of 4-bit OPCODE */
+      }
+
       out.push_back(encoding);
       break;
    }
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index e14674a2546..b393ef15283 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -737,13 +737,8 @@ struct MUBUF_instruction : public Instruction {
  *
  */
 struct MTBUF_instruction : public Instruction {
-   union {
-      struct {
-         uint8_t dfmt : 4; /* Data Format of data in memory buffer */
-         uint8_t nfmt : 3; /* Numeric format of data in memory */
-      };
-      uint8_t img_format; /* Buffer or image format as used by GFX10 */
-   };
+   uint8_t dfmt : 4; /* Data Format of data in memory buffer */
+   uint8_t nfmt : 3; /* Numeric format of data in memory */
    unsigned offset; /* Unsigned byte offset - 12 bit */
    bool offen; /* Supply an offset from VGPR (VADDR) */
    bool idxen; /* Supply an index from VGPR (VADDR) */
-- 
2.30.2