From 0d42e4d7a02e0b2914d4fa2ad778e049522d263e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 4 Nov 2019 18:02:47 +0100 Subject: [PATCH] aco: Initial GFX7 Support Reviewed-by: Rhys Perry --- src/amd/compiler/aco_assembler.cpp | 47 +++++++++---- src/amd/compiler/aco_ir.h | 1 + src/amd/compiler/aco_opcodes.py | 105 ++++++++++++++-------------- src/amd/compiler/aco_opcodes_cpp.py | 14 ++-- 4 files changed, 95 insertions(+), 72 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 54dc520bb19..bc905f9f969 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -17,7 +17,9 @@ struct asm_context { // TODO: keep track of branch instructions referring blocks // and, when emitting the block, correct the offset in instr asm_context(Program* program) : program(program), chip_class(program->chip_class) { - if (chip_class <= GFX9) + if (chip_class <= GFX7) + opcode = &instr_info.opcode_gfx7[0]; + else if (chip_class <= GFX9) opcode = &instr_info.opcode_gfx9[0]; else if (chip_class == GFX10) opcode = &instr_info.opcode_gfx10[0]; @@ -145,9 +147,26 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* SMEM_instruction* smem = static_cast(instr); bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4); bool is_load = !instr->definitions.empty(); - uint32_t encoding = 0; + if (ctx.chip_class <= GFX7) { + encoding = (0b11000 << 27); + encoding |= opcode << 22; + encoding |= instr->definitions.size() ? instr->definitions[0].physReg() << 15 : 0; + encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0; + if (!instr->operands[1].isConstant() || instr->operands[1].constantValue() >= 1024) { + encoding |= instr->operands[1].physReg().reg; + } else { + encoding |= instr->operands[1].constantValue() >> 2; + encoding |= 1 << 8; + } + out.push_back(encoding); + /* SMRD instructions can take a literal on GFX6 & GFX7 */ + if (instr->operands[1].isConstant() && instr->operands[1].constantValue() >= 1024) + out.push_back(instr->operands[1].constantValue() >> 2); + return; + } + if (ctx.chip_class <= GFX9) { encoding = (0b110000 << 26); assert(!smem->dlc); /* Device-level coherent is not supported on GFX9 and lower */ @@ -291,7 +310,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= (mubuf->glc ? 1 : 0) << 14; encoding |= (mubuf->idxen ? 1 : 0) << 13; encoding |= (mubuf->offen ? 1 : 0) << 12; - if (ctx.chip_class <= GFX9) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { assert(!mubuf->dlc); /* Device-level coherent is not supported on GFX9 and lower */ encoding |= (mubuf->slc ? 1 : 0) << 17; } else if (ctx.chip_class >= GFX10) { @@ -326,7 +345,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= 0x0FFF & mtbuf->offset; encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */ - if (ctx.chip_class <= GFX9) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { encoding |= opcode << 15; } else { encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */ @@ -444,9 +463,9 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* case Format::EXP: { Export_instruction* exp = static_cast(instr); uint32_t encoding; - if (ctx.chip_class <= GFX9) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { encoding = (0b110001 << 26); - } else if (ctx.chip_class >= GFX10) { + } else { encoding = (0b111110 << 26); } @@ -473,12 +492,10 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* if ((uint16_t) instr->format & (uint16_t) Format::VOP2) { opcode = opcode + 0x100; } else if ((uint16_t) instr->format & (uint16_t) Format::VOP1) { - if (ctx.chip_class <= GFX9) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) opcode = opcode + 0x140; - } else { - /* RDNA ISA doc says this is 0x140, but that doesn't work */ + else opcode = opcode + 0x180; - } } else if ((uint16_t) instr->format & (uint16_t) Format::VOPC) { opcode = opcode + 0x0; } else if ((uint16_t) instr->format & (uint16_t) Format::VINTRP) { @@ -492,8 +509,13 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding = (0b110101 << 26); } - encoding |= opcode << 16; - encoding |= (vop3->clamp ? 1 : 0) << 15; + if (ctx.chip_class <= GFX7) { + encoding |= opcode << 17; + encoding |= (vop3->clamp ? 1 : 0) << 11; + } else { + encoding |= opcode << 16; + encoding |= (vop3->clamp ? 1 : 0) << 15; + } for (unsigned i = 0; i < 3; i++) encoding |= vop3->abs[i] << (8+i); for (unsigned i = 0; i < 4; i++) @@ -515,6 +537,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* out.push_back(encoding); } else if (instr->isDPP()){ + assert(ctx.chip_class >= GFX8); /* first emit the instruction without the DPP operand */ Operand dpp_op = instr->operands[0]; instr->operands[0] = Operand(PhysReg{250}, v1); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 1f4721f5ffd..10661858ca0 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1259,6 +1259,7 @@ uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs); uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves); typedef struct { + const int16_t opcode_gfx7[static_cast(aco_opcode::num_opcodes)]; const int16_t opcode_gfx9[static_cast(aco_opcode::num_opcodes)]; const int16_t opcode_gfx10[static_cast(aco_opcode::num_opcodes)]; const std::bitset(aco_opcode::num_opcodes)> can_use_input_modifiers; diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index a4b02507eda..f9697420ae0 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -155,7 +155,7 @@ class Opcode(object): """Class that represents all the information we have about the opcode NOTE: this must be kept in sync with aco_op_info """ - def __init__(self, name, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod): + def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod): """Parameters: - name is the name of the opcode (prepend nir_op_ for the enum name) @@ -167,6 +167,7 @@ class Opcode(object): constant value of the opcode given the constant values of its inputs. """ assert isinstance(name, str) + assert isinstance(opcode_gfx7, int) assert isinstance(opcode_gfx9, int) assert isinstance(opcode_gfx10, int) assert isinstance(format, Format) @@ -174,6 +175,7 @@ class Opcode(object): assert isinstance(output_mod, bool) self.name = name + self.opcode_gfx7 = opcode_gfx7 self.opcode_gfx9 = opcode_gfx9 self.opcode_gfx10 = opcode_gfx10 self.input_mod = "1" if input_mod else "0" @@ -184,14 +186,11 @@ class Opcode(object): # global dictionary of opcodes opcodes = {} -# VOPC to GFX6 opcode translation map -VOPC_GFX6 = [0] * 256 - -def opcode(name, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False): +def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False): assert name not in opcodes - opcodes[name] = Opcode(name, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod) + opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod) -opcode("exp", 0, 0, format = Format.EXP) +opcode("exp", 0, 0, 0, format = Format.EXP) opcode("p_parallelcopy") opcode("p_startpgm") opcode("p_phi") @@ -302,7 +301,7 @@ SOP2 = { ( -1, -1, -1, 0x2d, 0x36, "s_mul_hi_i32"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP2: - opcode(name, gfx9, gfx10, Format.SOP2) + opcode(name, gfx7, gfx9, gfx10, Format.SOP2) # SOPK instructions: 0 input (+ imm), 1 output + optional scc @@ -338,7 +337,7 @@ SOPK = { ( -1, -1, -1, -1, 0x1c, "s_subvector_loop_end"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPK: - opcode(name, gfx9, gfx10, Format.SOPK) + opcode(name, gfx7, gfx9, gfx10, Format.SOPK) # SOP1 instructions: 1 input, 1 output (+optional SCC) @@ -416,7 +415,7 @@ SOP1 = { ( -1, -1, -1, -1, -1, "p_constaddr"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP1: - opcode(name, gfx9, gfx10, Format.SOP1) + opcode(name, gfx7, gfx9, gfx10, Format.SOP1) # SOPC instructions: 2 inputs and 0 outputs (+SCC) @@ -444,7 +443,7 @@ SOPC = { ( -1, -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC: - opcode(name, gfx9, gfx10, Format.SOPC) + opcode(name, gfx7, gfx9, gfx10, Format.SOPC) # SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs @@ -491,7 +490,7 @@ SOPP = { ( -1, -1, -1, -1, 0x26, "s_ttracedata_imm"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPP: - opcode(name, gfx9, gfx10, Format.SOPP) + opcode(name, gfx7, gfx9, gfx10, Format.SOPP) # SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output @@ -585,7 +584,7 @@ SMEM = { ( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM: - opcode(name, gfx9, gfx10, Format.SMEM) + opcode(name, gfx7, gfx9, gfx10, Format.SMEM) # VOP2 instructions: 2 inputs, 1 output (+ optional vcc) @@ -663,7 +662,7 @@ VOP2 = { ( -1, -1, -1, -1, 0x3c, "v_pk_fmac_f16", False), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2: - opcode(name, gfx9, gfx10, Format.VOP2, modifiers, modifiers) + opcode(name, gfx7, gfx9, gfx10, Format.VOP2, modifiers, modifiers) # VOP1 instructions: instructions with 1 input and 1 output @@ -763,7 +762,7 @@ VOP1 = { ( -1, -1, -1, -1, 0x68, "v_swaprel_b32", False, False), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP1: - opcode(name, gfx9, gfx10, Format.VOP1, in_mod, out_mod) + opcode(name, gfx7, gfx9, gfx10, Format.VOP1, in_mod, out_mod) # VOPC instructions: @@ -777,29 +776,29 @@ VOPC_CLASS = { (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in VOPC_CLASS: - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"] for i in range(8): (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16") - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16") - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16") - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16") - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) for i in range(16): (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32") - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32") - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64") - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64") - opcode(name, gfx9, gfx10, Format.VOPC, True, False) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False) # GFX_6_7 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32") (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32") @@ -811,41 +810,41 @@ COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"] # GFX_8_9 for i in [0,7]: # only 0 and 7 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) for i in range(1, 7): # [1..6] (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) for i in range(8): (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64") - opcode(name, gfx9, gfx10, Format.VOPC) + opcode(name, gfx7, gfx9, gfx10, Format.VOPC) # VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output @@ -876,7 +875,7 @@ VOPP = { # note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name) for (code, name) in VOPP: - opcode(name, code, code, Format.VOP3P) + opcode(name, -1, code, code, Format.VOP3P) # VINTERP instructions: @@ -887,7 +886,7 @@ VINTRP = { } # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) for (code, name) in VINTRP: - opcode(name, code, code, Format.VINTRP) + opcode(name, code, code, code, Format.VINTRP) # VOP3 instructions: 3 inputs, 1 output # VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out @@ -1015,7 +1014,7 @@ VOP3 = { # TODO: many 16bit instructions moved from VOP2 to VOP3 on GFX10 } for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP3: - opcode(name, gfx9, gfx10, Format.VOP3A, in_mod, out_mod) + opcode(name, gfx7, gfx9, gfx10, Format.VOP3A, in_mod, out_mod) # DS instructions: 3 inputs (1 addr, 2 data), 1 output @@ -1177,7 +1176,7 @@ DS = { ( -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS: - opcode(name, gfx9, gfx10, Format.DS) + opcode(name, gfx7, gfx9, gfx10, Format.DS) # MUBUF instructions: MUBUF = { @@ -1262,7 +1261,7 @@ MUBUF = { ( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF: - opcode(name, gfx9, gfx10, Format.MUBUF) + opcode(name, gfx7, gfx9, gfx10, Format.MUBUF) MTBUF = { (0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"), @@ -1283,7 +1282,7 @@ MTBUF = { ( -1, -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF: - opcode(name, gfx9, gfx10, Format.MTBUF) + opcode(name, gfx7, gfx9, gfx10, Format.MTBUF) IMAGE = { @@ -1302,7 +1301,7 @@ IMAGE = { } # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) for (code, name) in IMAGE: - opcode(name, code, code, Format.MIMG) + opcode(name, code, code, code, Format.MIMG) IMAGE_ATOMIC = { (0x0f, 0x0f, 0x10, "image_atomic_swap"), @@ -1326,7 +1325,7 @@ IMAGE_ATOMIC = { # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name) # gfx7 and gfx10 opcodes are the same here for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC: - opcode(name, gfx89, gfx7, Format.MIMG) + opcode(name, gfx7, gfx89, gfx7, Format.MIMG) IMAGE_SAMPLE = { (0x20, "image_sample"), @@ -1372,7 +1371,7 @@ IMAGE_SAMPLE = { } # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) for (code, name) in IMAGE_SAMPLE: - opcode(name, code, code, Format.MIMG) + opcode(name, code, code, code, Format.MIMG) IMAGE_GATHER4 = { (0x40, "image_gather4"), @@ -1405,7 +1404,7 @@ IMAGE_GATHER4 = { } # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) for (code, name) in IMAGE_GATHER4: - opcode(name, code, code, Format.MIMG) + opcode(name, code, code, code, Format.MIMG) FLAT = { @@ -1466,7 +1465,7 @@ FLAT = { (0x60, -1, 0x60, "flat_atomic_fmax_x2"), } for (gfx7, gfx8, gfx10, name) in FLAT: - opcode(name, gfx8, gfx10, Format.FLAT) + opcode(name, gfx7, gfx8, gfx10, Format.FLAT) GLOBAL = { #GFX8_9, GFX10 @@ -1526,7 +1525,7 @@ GLOBAL = { ( -1, 0x60, "global_atomic_fmax_x2"), } for (gfx8, gfx10, name) in GLOBAL: - opcode(name, gfx8, gfx10, Format.GLOBAL) + opcode(name, -1, gfx8, gfx10, Format.GLOBAL) SCRATCH = { #GFX8_9, GFX10 @@ -1554,7 +1553,7 @@ SCRATCH = { (0x25, 0x25, "scratch_load_short_d16_hi"), } for (gfx8, gfx10, name) in SCRATCH: - opcode(name, gfx8, gfx10, Format.SCRATCH) + opcode(name, -1, gfx8, gfx10, Format.SCRATCH) # check for duplicate opcode numbers for ver in ['gfx9', 'gfx10']: diff --git a/src/amd/compiler/aco_opcodes_cpp.py b/src/amd/compiler/aco_opcodes_cpp.py index 83c24e0eb44..834da904b88 100644 --- a/src/amd/compiler/aco_opcodes_cpp.py +++ b/src/amd/compiler/aco_opcodes_cpp.py @@ -28,11 +28,6 @@ template = """\ namespace aco { -const unsigned VOPC_to_GFX6[256] = { -% for code in VOPC_GFX6: - ${code}, -% endfor -}; <% opcode_names = sorted(opcodes.keys()) @@ -41,6 +36,11 @@ can_use_output_modifiers = "".join([opcodes[name].output_mod for name in reverse %> extern const aco::Info instr_info = { + .opcode_gfx7 = { + % for name in opcode_names: + ${opcodes[name].opcode_gfx7}, + % endfor + }, .opcode_gfx9 = { % for name in opcode_names: ${opcodes[name].opcode_gfx9}, @@ -68,7 +68,7 @@ extern const aco::Info instr_info = { } """ -from aco_opcodes import opcodes, VOPC_GFX6 +from aco_opcodes import opcodes from mako.template import Template -print(Template(template).render(opcodes=opcodes, VOPC_GFX6=VOPC_GFX6)) +print(Template(template).render(opcodes=opcodes)) -- 2.30.2