From fd1d9474579f8421b04c3641da84b2fc845e9e7c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 26 Sep 2019 17:45:13 +0200 Subject: [PATCH] aco: Add missing GFX10 specific fields and some README notes. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/README | 26 ++++++++++++++++++++++++++ src/amd/compiler/aco_assembler.cpp | 2 ++ src/amd/compiler/aco_ir.h | 7 +++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 87d63c07024..254c5028524 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -69,12 +69,38 @@ situations. The `image_atomic_{swap,cmpswap,add,sub}` opcodes in the GCN3 ISA reference guide are incorrect. The Vega ISA reference guide has the correct ones. +## VINTRP encoding + +VEGA ISA doc says the encoding should be `110010` but `110101` works. + +## VOP1 instructions encoded as VOP3 + +RDNA ISA doc says that `0x140` should be added to the opcode, but that doesn't +work. What works is adding `0x180`, which LLVM also does. + +## FLAT, Scratch, Global instructions + +The NV bit was removed in RDNA, but some parts of the doc still mention it. + ## Legacy instructions Some instructions have a `_LEGACY` variant which implements "DX9 rules", in which the zero "wins" in multiplications, ie. `0.0*x` is always `0.0`. The VEGA ISA mentions `V_MAC_LEGACY_F32` but this instruction is not really there on VEGA. +## RDNA L0, L1 cache and DLC, GLC bits + +The old L1 cache was renamed to L0, and a new L1 cache was added to RDNA. The +L1 cache is 1 cache per shader array. Some instruction encodings have DLC and +GLC bits that interact with the cache. + +* DLC ("device level coherent") bit: controls the L1 cache +* GLC ("globally coherent") bit: controls the L0 cache + +The recommendation from AMD devs is to always set these two bits at the same time, +as it doesn't make too much sense to set them independently, aside from some +circumstances (eg. we needn't set DLC when only one shader array is used). + # Hardware Bugs ## SMEM corrupts VCCZ on SI/CI diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index a6bf2a3e0db..9423a26997d 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -16,6 +16,8 @@ struct asm_context { asm_context(Program* program) : program(program), chip_class(program->chip_class) { if (chip_class <= GFX9) opcode = &instr_info.opcode_gfx9[0]; + else if (chip_class == GFX10) + opcode = &instr_info.opcode_gfx10[0]; } }; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index ac2bfebbcf0..e14674a2546 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -221,6 +221,7 @@ struct PhysReg { /* helper expressions for special registers */ static constexpr PhysReg m0{124}; static constexpr PhysReg vcc{106}; +static constexpr PhysReg sgpr_null{125}; /* GFX10+ */ static constexpr PhysReg exec{126}; static constexpr PhysReg exec_lo{126}; static constexpr PhysReg exec_hi{127}; @@ -765,6 +766,7 @@ struct MTBUF_instruction : public Instruction { */ struct MIMG_instruction : public Instruction { unsigned dmask; /* Data VGPR enable mask */ + unsigned dim; /* NAVI: dimensionality */ bool unrm; /* Force address to be un-normalized */ bool dlc; /* NAVI: device level coherent */ bool glc; /* globally coherent */ @@ -789,8 +791,9 @@ struct MIMG_instruction : public Instruction { */ struct FLAT_instruction : public Instruction { uint16_t offset; /* Vega only */ - bool slc; - bool glc; + bool slc; /* system level coherent */ + bool glc; /* globally coherent */ + bool dlc; /* NAVI: device level coherent */ bool lds; bool nv; }; -- 2.30.2