2 # Copyright (c) 2018 Valve Corporation
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the "Software"),
6 # to deal in the Software without restriction, including without limitation
7 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 # and/or sell copies of the Software, and to permit persons to whom the
9 # Software is furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice (including the next
12 # paragraph) shall be included in all copies or substantial portions of the
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 # Daniel Schuermann (daniel.schuermann@campus.tu-berlin.de)
27 # Class that represents all the information we have about the opcode
28 # NOTE: this must be kept in sync with aco_op_info
62 def get_builder_fields(self
):
63 if self
== Format
.SOPK
:
64 return [('uint16_t', 'imm', None)]
65 elif self
== Format
.SOPP
:
66 return [('uint32_t', 'block', '-1'),
67 ('uint32_t', 'imm', '0')]
68 elif self
== Format
.SMEM
:
69 return [('bool', 'can_reorder', 'true'),
70 ('bool', 'glc', 'false'),
71 ('bool', 'dlc', 'false'),
72 ('bool', 'nv', 'false')]
73 elif self
== Format
.DS
:
74 return [('int16_t', 'offset0', '0'),
75 ('int8_t', 'offset1', '0'),
76 ('bool', 'gds', 'false')]
77 elif self
== Format
.MTBUF
:
78 return [('unsigned', 'dfmt', None),
79 ('unsigned', 'nfmt', None),
80 ('unsigned', 'offset', None),
81 ('bool', 'offen', None),
82 ('bool', 'idxen', 'false'),
83 ('bool', 'disable_wqm', 'false'),
84 ('bool', 'glc', 'false'),
85 ('bool', 'dlc', 'false'),
86 ('bool', 'slc', 'false'),
87 ('bool', 'tfe', 'false')]
88 elif self
== Format
.MUBUF
:
89 return [('unsigned', 'offset', None),
90 ('bool', 'offen', None),
91 ('bool', 'swizzled', 'false'),
92 ('bool', 'idxen', 'false'),
93 ('bool', 'addr64', 'false'),
94 ('bool', 'disable_wqm', 'false'),
95 ('bool', 'glc', 'false'),
96 ('bool', 'dlc', 'false'),
97 ('bool', 'slc', 'false'),
98 ('bool', 'tfe', 'false'),
99 ('bool', 'lds', 'false')]
100 elif self
== Format
.MIMG
:
101 return [('unsigned', 'dmask', '0xF'),
102 ('bool', 'da', 'false'),
103 ('bool', 'unrm', 'true'),
104 ('bool', 'disable_wqm', 'false'),
105 ('bool', 'glc', 'false'),
106 ('bool', 'dlc', 'false'),
107 ('bool', 'slc', 'false'),
108 ('bool', 'tfe', 'false'),
109 ('bool', 'lwe', 'false'),
110 ('bool', 'r128_a16', 'false', 'r128'),
111 ('bool', 'd16', 'false')]
112 return [('unsigned', 'attribute', None),
113 ('unsigned', 'component', None)]
114 elif self
== Format
.EXP
:
115 return [('unsigned', 'enabled_mask', None),
116 ('unsigned', 'dest', None),
117 ('bool', 'compr', 'false', 'compressed'),
118 ('bool', 'done', 'false'),
119 ('bool', 'vm', 'false', 'valid_mask')]
120 elif self
== Format
.PSEUDO_BRANCH
:
121 return [('uint32_t', 'target0', '0', 'target[0]'),
122 ('uint32_t', 'target1', '0', 'target[1]')]
123 elif self
== Format
.PSEUDO_REDUCTION
:
124 return [('ReduceOp', 'op', None, 'reduce_op'),
125 ('unsigned', 'cluster_size', '0')]
126 elif self
== Format
.VINTRP
:
127 return [('unsigned', 'attribute', None),
128 ('unsigned', 'component', None)]
129 elif self
== Format
.DPP
:
130 return [('uint16_t', 'dpp_ctrl', None),
131 ('uint8_t', 'row_mask', '0xF'),
132 ('uint8_t', 'bank_mask', '0xF'),
133 ('bool', 'bound_ctrl', 'true')]
134 elif self
in [Format
.FLAT
, Format
.GLOBAL
, Format
.SCRATCH
]:
135 return [('uint16_t', 'offset', 0),
136 ('bool', 'can_reorder', 'true'),
137 ('bool', 'glc', 'false'),
138 ('bool', 'slc', 'false'),
139 ('bool', 'lds', 'false'),
140 ('bool', 'nv', 'false')]
144 def get_builder_field_names(self
):
145 return [f
[1] for f
in self
.get_builder_fields()]
147 def get_builder_field_dests(self
):
148 return [(f
[3] if len(f
) >= 4 else f
[1]) for f
in self
.get_builder_fields()]
150 def get_builder_field_decls(self
):
151 return [('%s %s=%s' % (f
[0], f
[1], f
[2]) if f
[2] != None else '%s %s' % (f
[0], f
[1])) for f
in self
.get_builder_fields()]
153 def get_builder_initialization(self
, num_operands
):
155 if self
== Format
.SDWA
:
156 for i
in range(min(num_operands
, 2)):
157 res
+= 'instr->sel[{0}] = op{0}.op.bytes() == 2 ? sdwa_uword : (op{0}.op.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'.format(i
)
158 res
+= 'instr->dst_sel = def0.bytes() == 2 ? sdwa_uword : (def0.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'
159 res
+= 'instr->dst_preserve = true;'
163 class Opcode(object):
164 """Class that represents all the information we have about the opcode
165 NOTE: this must be kept in sync with aco_op_info
167 def __init__(self
, name
, opcode_gfx7
, opcode_gfx9
, opcode_gfx10
, format
, input_mod
, output_mod
, is_atomic
):
170 - name is the name of the opcode (prepend nir_op_ for the enum name)
171 - all types are strings that get nir_type_ prepended to them
172 - input_types is a list of types
173 - algebraic_properties is a space-seperated string, where nir_op_is_ is
174 prepended before each entry
175 - const_expr is an expression or series of statements that computes the
176 constant value of the opcode given the constant values of its inputs.
178 assert isinstance(name
, str)
179 assert isinstance(opcode_gfx7
, int)
180 assert isinstance(opcode_gfx9
, int)
181 assert isinstance(opcode_gfx10
, int)
182 assert isinstance(format
, Format
)
183 assert isinstance(input_mod
, bool)
184 assert isinstance(output_mod
, bool)
187 self
.opcode_gfx7
= opcode_gfx7
188 self
.opcode_gfx9
= opcode_gfx9
189 self
.opcode_gfx10
= opcode_gfx10
190 self
.input_mod
= "1" if input_mod
else "0"
191 self
.output_mod
= "1" if output_mod
else "0"
192 self
.is_atomic
= "1" if is_atomic
else "0"
195 parts
= name
.replace('_e64', '').rsplit('_', 2)
197 def_dtype
= parts
[-2] if len(parts
) > 1 else parts
[-1]
199 def_dtype_sizes
= {'{}{}'.format(prefix
, size
) : size
for prefix
in 'biuf' for size
in [64, 32, 24, 16]}
200 op_dtype_sizes
= {k
:v
for k
, v
in def_dtype_sizes
.items()}
201 # inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841
202 op_dtype_sizes
['b16'] = 32
203 op_dtype_sizes
['i16'] = 32
204 op_dtype_sizes
['u16'] = 32
206 self
.operand_size
= op_dtype_sizes
.get(op_dtype
, 0)
207 self
.definition_size
= def_dtype_sizes
.get(def_dtype
, self
.operand_size
)
210 if self
.operand_size
== 16 and op_dtype
!= 'f16':
211 self
.operand_size
= 16
212 elif self
.operand_size
== 24:
213 self
.operand_size
= 32
214 elif name
in ['s_sext_i32_i8', 's_sext_i32_i16', 'v_msad_u8', 'v_cvt_pk_u16_u32', 'v_cvt_pk_i16_i32']:
215 self
.operand_size
= 32
216 elif name
in ['v_qsad_pk_u16_u8', 'v_mqsad_pk_u16_u8', 'v_mqsad_u32_u8']:
217 self
.definition_size
= 0
218 self
.operand_size
= 0
219 elif name
in ['v_mad_u64_u32', 'v_mad_i64_i32']:
220 self
.operand_size
= 0
221 elif '_pk_' in name
or name
in ['v_lerp_u8', 'v_sad_u8', 'v_sad_u16',
222 'v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1',
223 'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']:
224 self
.operand_size
= 32
225 self
.definition_size
= 32
226 elif '_pknorm_' in name
:
227 self
.definition_size
= 32
228 elif format
== Format
.PSEUDO_REDUCTION
:
229 # 64-bit reductions can have a larger definition size, but get_subdword_definition_info() handles that
230 self
.definition_size
= 32
233 # global dictionary of opcodes
236 def opcode(name
, opcode_gfx7
= -1, opcode_gfx9
= -1, opcode_gfx10
= -1, format
= Format
.PSEUDO
, input_mod
= False, output_mod
= False, is_atomic
= False):
237 assert name
not in opcodes
238 opcodes
[name
] = Opcode(name
, opcode_gfx7
, opcode_gfx9
, opcode_gfx10
, format
, input_mod
, output_mod
, is_atomic
)
240 opcode("exp", 0, 0, 0, format
= Format
.EXP
)
241 opcode("p_parallelcopy")
244 opcode("p_linear_phi")
245 opcode("p_as_uniform")
247 opcode("p_create_vector")
248 opcode("p_extract_vector")
249 opcode("p_split_vector")
251 # start/end the parts where we can use exec based instructions
253 opcode("p_logical_start")
254 opcode("p_logical_end")
256 # e.g. subgroupMin() in SPIR-V
257 opcode("p_reduce", format
=Format
.PSEUDO_REDUCTION
)
258 # e.g. subgroupInclusiveMin()
259 opcode("p_inclusive_scan", format
=Format
.PSEUDO_REDUCTION
)
260 # e.g. subgroupExclusiveMin()
261 opcode("p_exclusive_scan", format
=Format
.PSEUDO_REDUCTION
)
263 opcode("p_branch", format
=Format
.PSEUDO_BRANCH
)
264 opcode("p_cbranch", format
=Format
.PSEUDO_BRANCH
)
265 opcode("p_cbranch_z", format
=Format
.PSEUDO_BRANCH
)
266 opcode("p_cbranch_nz", format
=Format
.PSEUDO_BRANCH
)
268 opcode("p_memory_barrier_common", format
=Format
.PSEUDO_BARRIER
) # atomic, buffer, image and shared
269 opcode("p_memory_barrier_atomic", format
=Format
.PSEUDO_BARRIER
)
270 opcode("p_memory_barrier_buffer", format
=Format
.PSEUDO_BARRIER
)
271 opcode("p_memory_barrier_image", format
=Format
.PSEUDO_BARRIER
)
272 opcode("p_memory_barrier_shared", format
=Format
.PSEUDO_BARRIER
)
273 opcode("p_memory_barrier_gs_data", format
=Format
.PSEUDO_BARRIER
)
274 opcode("p_memory_barrier_gs_sendmsg", format
=Format
.PSEUDO_BARRIER
)
279 # start/end linear vgprs
280 opcode("p_start_linear_vgpr")
281 opcode("p_end_linear_vgpr")
284 opcode("p_discard_if")
285 opcode("p_load_helper")
286 opcode("p_demote_to_helper")
287 opcode("p_is_helper")
288 opcode("p_exit_early_if")
290 opcode("p_fs_buffer_store_smem", format
=Format
.SMEM
)
292 # simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64
295 # SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
297 # GFX6, GFX7, GFX8, GFX9, GFX10, name
298 (0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"),
299 (0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"),
300 (0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"),
301 (0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"),
302 (0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"),
303 (0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"),
304 (0x06, 0x06, 0x06, 0x06, 0x06, "s_min_i32"),
305 (0x07, 0x07, 0x07, 0x07, 0x07, "s_min_u32"),
306 (0x08, 0x08, 0x08, 0x08, 0x08, "s_max_i32"),
307 (0x09, 0x09, 0x09, 0x09, 0x09, "s_max_u32"),
308 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cselect_b32"),
309 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cselect_b64"),
310 (0x0e, 0x0e, 0x0c, 0x0c, 0x0e, "s_and_b32"),
311 (0x0f, 0x0f, 0x0d, 0x0d, 0x0f, "s_and_b64"),
312 (0x10, 0x10, 0x0e, 0x0e, 0x10, "s_or_b32"),
313 (0x11, 0x11, 0x0f, 0x0f, 0x11, "s_or_b64"),
314 (0x12, 0x12, 0x10, 0x10, 0x12, "s_xor_b32"),
315 (0x13, 0x13, 0x11, 0x11, 0x13, "s_xor_b64"),
316 (0x14, 0x14, 0x12, 0x12, 0x14, "s_andn2_b32"),
317 (0x15, 0x15, 0x13, 0x13, 0x15, "s_andn2_b64"),
318 (0x16, 0x16, 0x14, 0x14, 0x16, "s_orn2_b32"),
319 (0x17, 0x17, 0x15, 0x15, 0x17, "s_orn2_b64"),
320 (0x18, 0x18, 0x16, 0x16, 0x18, "s_nand_b32"),
321 (0x19, 0x19, 0x17, 0x17, 0x19, "s_nand_b64"),
322 (0x1a, 0x1a, 0x18, 0x18, 0x1a, "s_nor_b32"),
323 (0x1b, 0x1b, 0x19, 0x19, 0x1b, "s_nor_b64"),
324 (0x1c, 0x1c, 0x1a, 0x1a, 0x1c, "s_xnor_b32"),
325 (0x1d, 0x1d, 0x1b, 0x1b, 0x1d, "s_xnor_b64"),
326 (0x1e, 0x1e, 0x1c, 0x1c, 0x1e, "s_lshl_b32"),
327 (0x1f, 0x1f, 0x1d, 0x1d, 0x1f, "s_lshl_b64"),
328 (0x20, 0x20, 0x1e, 0x1e, 0x20, "s_lshr_b32"),
329 (0x21, 0x21, 0x1f, 0x1f, 0x21, "s_lshr_b64"),
330 (0x22, 0x22, 0x20, 0x20, 0x22, "s_ashr_i32"),
331 (0x23, 0x23, 0x21, 0x21, 0x23, "s_ashr_i64"),
332 (0x24, 0x24, 0x22, 0x22, 0x24, "s_bfm_b32"),
333 (0x25, 0x25, 0x23, 0x23, 0x25, "s_bfm_b64"),
334 (0x26, 0x26, 0x24, 0x24, 0x26, "s_mul_i32"),
335 (0x27, 0x27, 0x25, 0x25, 0x27, "s_bfe_u32"),
336 (0x28, 0x28, 0x26, 0x26, 0x28, "s_bfe_i32"),
337 (0x29, 0x29, 0x27, 0x27, 0x29, "s_bfe_u64"),
338 (0x2a, 0x2a, 0x28, 0x28, 0x2a, "s_bfe_i64"),
339 (0x2b, 0x2b, 0x29, 0x29, -1, "s_cbranch_g_fork"),
340 (0x2c, 0x2c, 0x2a, 0x2a, 0x2c, "s_absdiff_i32"),
341 ( -1, -1, 0x2b, 0x2b, -1, "s_rfe_restore_b64"),
342 ( -1, -1, -1, 0x2e, 0x2e, "s_lshl1_add_u32"),
343 ( -1, -1, -1, 0x2f, 0x2f, "s_lshl2_add_u32"),
344 ( -1, -1, -1, 0x30, 0x30, "s_lshl3_add_u32"),
345 ( -1, -1, -1, 0x31, 0x31, "s_lshl4_add_u32"),
346 ( -1, -1, -1, 0x32, 0x32, "s_pack_ll_b32_b16"),
347 ( -1, -1, -1, 0x33, 0x33, "s_pack_lh_b32_b16"),
348 ( -1, -1, -1, 0x34, 0x34, "s_pack_hh_b32_b16"),
349 ( -1, -1, -1, 0x2c, 0x35, "s_mul_hi_u32"),
350 ( -1, -1, -1, 0x2d, 0x36, "s_mul_hi_i32"),
352 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in SOP2
:
353 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.SOP2
)
356 # SOPK instructions: 0 input (+ imm), 1 output + optional scc
358 # GFX6, GFX7, GFX8, GFX9, GFX10, name
359 (0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"),
360 ( -1, -1, -1, -1, 0x01, "s_version"), # GFX10+
361 (0x02, 0x02, 0x01, 0x01, 0x02, "s_cmovk_i32"), # GFX8_GFX9
362 (0x03, 0x03, 0x02, 0x02, 0x03, "s_cmpk_eq_i32"),
363 (0x04, 0x04, 0x03, 0x03, 0x04, "s_cmpk_lg_i32"),
364 (0x05, 0x05, 0x04, 0x04, 0x05, "s_cmpk_gt_i32"),
365 (0x06, 0x06, 0x05, 0x05, 0x06, "s_cmpk_ge_i32"),
366 (0x07, 0x07, 0x06, 0x06, 0x07, "s_cmpk_lt_i32"),
367 (0x08, 0x08, 0x07, 0x07, 0x08, "s_cmpk_le_i32"),
368 (0x09, 0x09, 0x08, 0x08, 0x09, "s_cmpk_eq_u32"),
369 (0x0a, 0x0a, 0x09, 0x09, 0x0a, "s_cmpk_lg_u32"),
370 (0x0b, 0x0b, 0x0a, 0x0a, 0x0b, "s_cmpk_gt_u32"),
371 (0x0c, 0x0c, 0x0b, 0x0b, 0x0c, "s_cmpk_ge_u32"),
372 (0x0d, 0x0d, 0x0c, 0x0c, 0x0d, "s_cmpk_lt_u32"),
373 (0x0e, 0x0e, 0x0d, 0x0d, 0x0e, "s_cmpk_le_u32"),
374 (0x0f, 0x0f, 0x0e, 0x0e, 0x0f, "s_addk_i32"),
375 (0x10, 0x10, 0x0f, 0x0f, 0x10, "s_mulk_i32"),
376 (0x11, 0x11, 0x10, 0x10, -1, "s_cbranch_i_fork"),
377 (0x12, 0x12, 0x11, 0x11, 0x12, "s_getreg_b32"),
378 (0x13, 0x13, 0x12, 0x12, 0x13, "s_setreg_b32"),
379 (0x15, 0x15, 0x14, 0x14, 0x15, "s_setreg_imm32_b32"), # requires 32bit literal
380 ( -1, -1, 0x15, 0x15, 0x16, "s_call_b64"),
381 ( -1, -1, -1, -1, 0x17, "s_waitcnt_vscnt"),
382 ( -1, -1, -1, -1, 0x18, "s_waitcnt_vmcnt"),
383 ( -1, -1, -1, -1, 0x19, "s_waitcnt_expcnt"),
384 ( -1, -1, -1, -1, 0x1a, "s_waitcnt_lgkmcnt"),
385 ( -1, -1, -1, -1, 0x1b, "s_subvector_loop_begin"),
386 ( -1, -1, -1, -1, 0x1c, "s_subvector_loop_end"),
388 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in SOPK
:
389 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.SOPK
)
392 # SOP1 instructions: 1 input, 1 output (+optional SCC)
394 # GFX6, GFX7, GFX8, GFX9, GFX10, name
395 (0x03, 0x03, 0x00, 0x00, 0x03, "s_mov_b32"),
396 (0x04, 0x04, 0x01, 0x01, 0x04, "s_mov_b64"),
397 (0x05, 0x05, 0x02, 0x02, 0x05, "s_cmov_b32"),
398 (0x06, 0x06, 0x03, 0x03, 0x06, "s_cmov_b64"),
399 (0x07, 0x07, 0x04, 0x04, 0x07, "s_not_b32"),
400 (0x08, 0x08, 0x05, 0x05, 0x08, "s_not_b64"),
401 (0x09, 0x09, 0x06, 0x06, 0x09, "s_wqm_b32"),
402 (0x0a, 0x0a, 0x07, 0x07, 0x0a, "s_wqm_b64"),
403 (0x0b, 0x0b, 0x08, 0x08, 0x0b, "s_brev_b32"),
404 (0x0c, 0x0c, 0x09, 0x09, 0x0c, "s_brev_b64"),
405 (0x0d, 0x0d, 0x0a, 0x0a, 0x0d, "s_bcnt0_i32_b32"),
406 (0x0e, 0x0e, 0x0b, 0x0b, 0x0e, "s_bcnt0_i32_b64"),
407 (0x0f, 0x0f, 0x0c, 0x0c, 0x0f, "s_bcnt1_i32_b32"),
408 (0x10, 0x10, 0x0d, 0x0d, 0x10, "s_bcnt1_i32_b64"),
409 (0x11, 0x11, 0x0e, 0x0e, 0x11, "s_ff0_i32_b32"),
410 (0x12, 0x12, 0x0f, 0x0f, 0x12, "s_ff0_i32_b64"),
411 (0x13, 0x13, 0x10, 0x10, 0x13, "s_ff1_i32_b32"),
412 (0x14, 0x14, 0x11, 0x11, 0x14, "s_ff1_i32_b64"),
413 (0x15, 0x15, 0x12, 0x12, 0x15, "s_flbit_i32_b32"),
414 (0x16, 0x16, 0x13, 0x13, 0x16, "s_flbit_i32_b64"),
415 (0x17, 0x17, 0x14, 0x14, 0x17, "s_flbit_i32"),
416 (0x18, 0x18, 0x15, 0x15, 0x18, "s_flbit_i32_i64"),
417 (0x19, 0x19, 0x16, 0x16, 0x19, "s_sext_i32_i8"),
418 (0x1a, 0x1a, 0x17, 0x17, 0x1a, "s_sext_i32_i16"),
419 (0x1b, 0x1b, 0x18, 0x18, 0x1b, "s_bitset0_b32"),
420 (0x1c, 0x1c, 0x19, 0x19, 0x1c, "s_bitset0_b64"),
421 (0x1d, 0x1d, 0x1a, 0x1a, 0x1d, "s_bitset1_b32"),
422 (0x1e, 0x1e, 0x1b, 0x1b, 0x1e, "s_bitset1_b64"),
423 (0x1f, 0x1f, 0x1c, 0x1c, 0x1f, "s_getpc_b64"),
424 (0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64"),
425 (0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64"),
426 (0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64"),
427 (0x24, 0x24, 0x20, 0x20, 0x24, "s_and_saveexec_b64"),
428 (0x25, 0x25, 0x21, 0x21, 0x25, "s_or_saveexec_b64"),
429 (0x26, 0x26, 0x22, 0x22, 0x26, "s_xor_saveexec_b64"),
430 (0x27, 0x27, 0x23, 0x23, 0x27, "s_andn2_saveexec_b64"),
431 (0x28, 0x28, 0x24, 0x24, 0x28, "s_orn2_saveexec_b64"),
432 (0x29, 0x29, 0x25, 0x25, 0x29, "s_nand_saveexec_b64"),
433 (0x2a, 0x2a, 0x26, 0x26, 0x2a, "s_nor_saveexec_b64"),
434 (0x2b, 0x2b, 0x27, 0x27, 0x2b, "s_xnor_saveexec_b64"),
435 (0x2c, 0x2c, 0x28, 0x28, 0x2c, "s_quadmask_b32"),
436 (0x2d, 0x2d, 0x29, 0x29, 0x2d, "s_quadmask_b64"),
437 (0x2e, 0x2e, 0x2a, 0x2a, 0x2e, "s_movrels_b32"),
438 (0x2f, 0x2f, 0x2b, 0x2b, 0x2f, "s_movrels_b64"),
439 (0x30, 0x30, 0x2c, 0x2c, 0x30, "s_movreld_b32"),
440 (0x31, 0x31, 0x2d, 0x2d, 0x31, "s_movreld_b64"),
441 (0x32, 0x32, 0x2e, 0x2e, -1, "s_cbranch_join"),
442 (0x34, 0x34, 0x30, 0x30, 0x34, "s_abs_i32"),
443 (0x35, 0x35, -1, -1, 0x35, "s_mov_fed_b32"),
444 ( -1, -1, 0x32, 0x32, -1, "s_set_gpr_idx_idx"),
445 ( -1, -1, -1, 0x33, 0x37, "s_andn1_saveexec_b64"),
446 ( -1, -1, -1, 0x34, 0x38, "s_orn1_saveexec_b64"),
447 ( -1, -1, -1, 0x35, 0x39, "s_andn1_wrexec_b64"),
448 ( -1, -1, -1, 0x36, 0x3a, "s_andn2_wrexec_b64"),
449 ( -1, -1, -1, 0x37, 0x3b, "s_bitreplicate_b64_b32"),
450 ( -1, -1, -1, -1, 0x3c, "s_and_saveexec_b32"),
451 ( -1, -1, -1, -1, 0x3d, "s_or_saveexec_b32"),
452 ( -1, -1, -1, -1, 0x3e, "s_xor_saveexec_b32"),
453 ( -1, -1, -1, -1, 0x3f, "s_andn2_saveexec_b32"),
454 ( -1, -1, -1, -1, 0x40, "s_orn2_saveexec_b32"),
455 ( -1, -1, -1, -1, 0x41, "s_nand_saveexec_b32"),
456 ( -1, -1, -1, -1, 0x42, "s_nor_saveexec_b32"),
457 ( -1, -1, -1, -1, 0x43, "s_xnor_saveexec_b32"),
458 ( -1, -1, -1, -1, 0x44, "s_andn1_saveexec_b32"),
459 ( -1, -1, -1, -1, 0x45, "s_orn1_saveexec_b32"),
460 ( -1, -1, -1, -1, 0x46, "s_andn1_wrexec_b32"),
461 ( -1, -1, -1, -1, 0x47, "s_andn2_wrexec_b32"),
462 ( -1, -1, -1, -1, 0x49, "s_movrelsd_2_b32"),
463 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
464 ( -1, -1, -1, -1, -1, "p_constaddr"),
466 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in SOP1
:
467 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.SOP1
)
470 # SOPC instructions: 2 inputs and 0 outputs (+SCC)
472 # GFX6, GFX7, GFX8, GFX9, GFX10, name
473 (0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"),
474 (0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"),
475 (0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"),
476 (0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"),
477 (0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"),
478 (0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"),
479 (0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"),
480 (0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"),
481 (0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"),
482 (0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"),
483 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"),
484 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"),
485 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"),
486 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"),
487 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"),
488 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"),
489 (0x10, 0x10, 0x10, 0x10, -1, "s_setvskip"),
490 ( -1, -1, 0x11, 0x11, -1, "s_set_gpr_idx_on"),
491 ( -1, -1, 0x12, 0x12, 0x12, "s_cmp_eq_u64"),
492 ( -1, -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"),
494 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in SOPC
:
495 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.SOPC
)
498 # SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs
500 # GFX6, GFX7, GFX8, GFX9, GFX10, name
501 (0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"),
502 (0x01, 0x01, 0x01, 0x01, 0x01, "s_endpgm"),
503 (0x02, 0x02, 0x02, 0x02, 0x02, "s_branch"),
504 ( -1, -1, 0x03, 0x03, 0x03, "s_wakeup"),
505 (0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0"),
506 (0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1"),
507 (0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz"),
508 (0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz"),
509 (0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz"),
510 (0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz"),
511 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier"),
512 ( -1, 0x0b, 0x0b, 0x0b, 0x0b, "s_setkill"),
513 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt"),
514 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_sethalt"),
515 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_sleep"),
516 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_setprio"),
517 (0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg"),
518 (0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt"),
519 (0x12, 0x12, 0x12, 0x12, 0x12, "s_trap"),
520 (0x13, 0x13, 0x13, 0x13, 0x13, "s_icache_inv"),
521 (0x14, 0x14, 0x14, 0x14, 0x14, "s_incperflevel"),
522 (0x15, 0x15, 0x15, 0x15, 0x15, "s_decperflevel"),
523 (0x16, 0x16, 0x16, 0x16, 0x16, "s_ttracedata"),
524 ( -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys"),
525 ( -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser"),
526 ( -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user"),
527 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user"),
528 ( -1, -1, 0x1b, 0x1b, 0x1b, "s_endpgm_saved"),
529 ( -1, -1, 0x1c, 0x1c, -1, "s_set_gpr_idx_off"),
530 ( -1, -1, 0x1d, 0x1d, -1, "s_set_gpr_idx_mode"),
531 ( -1, -1, -1, 0x1e, 0x1e, "s_endpgm_ordered_ps_done"),
532 ( -1, -1, -1, -1, 0x1f, "s_code_end"),
533 ( -1, -1, -1, -1, 0x20, "s_inst_prefetch"),
534 ( -1, -1, -1, -1, 0x21, "s_clause"),
535 ( -1, -1, -1, -1, 0x22, "s_wait_idle"),
536 ( -1, -1, -1, -1, 0x23, "s_waitcnt_depctr"),
537 ( -1, -1, -1, -1, 0x24, "s_round_mode"),
538 ( -1, -1, -1, -1, 0x25, "s_denorm_mode"),
539 ( -1, -1, -1, -1, 0x26, "s_ttracedata_imm"),
541 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in SOPP
:
542 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.SOPP
)
545 # SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output
547 # GFX6, GFX7, GFX8, GFX9, GFX10, name
548 (0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"),
549 (0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"),
550 (0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"),
551 (0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"),
552 (0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"),
553 ( -1, -1, -1, 0x05, 0x05, "s_scratch_load_dword"),
554 ( -1, -1, -1, 0x06, 0x06, "s_scratch_load_dwordx2"),
555 ( -1, -1, -1, 0x07, 0x07, "s_scratch_load_dwordx4"),
556 (0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"),
557 (0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"),
558 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"),
559 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"),
560 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"),
561 ( -1, -1, 0x10, 0x10, 0x10, "s_store_dword"),
562 ( -1, -1, 0x11, 0x11, 0x11, "s_store_dwordx2"),
563 ( -1, -1, 0x12, 0x12, 0x12, "s_store_dwordx4"),
564 ( -1, -1, -1, 0x15, 0x15, "s_scratch_store_dword"),
565 ( -1, -1, -1, 0x16, 0x16, "s_scratch_store_dwordx2"),
566 ( -1, -1, -1, 0x17, 0x17, "s_scratch_store_dwordx4"),
567 ( -1, -1, 0x18, 0x18, 0x18, "s_buffer_store_dword"),
568 ( -1, -1, 0x19, 0x19, 0x19, "s_buffer_store_dwordx2"),
569 ( -1, -1, 0x1a, 0x1a, 0x1a, "s_buffer_store_dwordx4"),
570 ( -1, -1, 0x1f, 0x1f, 0x1f, "s_gl1_inv"),
571 (0x1f, 0x1f, 0x20, 0x20, 0x20, "s_dcache_inv"),
572 ( -1, -1, 0x21, 0x21, 0x21, "s_dcache_wb"),
573 ( -1, 0x1d, 0x22, 0x22, -1, "s_dcache_inv_vol"),
574 ( -1, -1, 0x23, 0x23, -1, "s_dcache_wb_vol"),
575 (0x1e, 0x1e, 0x24, 0x24, 0x24, "s_memtime"),
576 ( -1, -1, 0x25, 0x25, 0x25, "s_memrealtime"),
577 ( -1, -1, 0x26, 0x26, 0x26, "s_atc_probe"),
578 ( -1, -1, 0x27, 0x27, 0x27, "s_atc_probe_buffer"),
579 ( -1, -1, -1, 0x28, 0x28, "s_dcache_discard"),
580 ( -1, -1, -1, 0x29, 0x29, "s_dcache_discard_x2"),
581 ( -1, -1, -1, -1, 0x2a, "s_get_waveid_in_workgroup"),
582 ( -1, -1, -1, 0x40, 0x40, "s_buffer_atomic_swap"),
583 ( -1, -1, -1, 0x41, 0x41, "s_buffer_atomic_cmpswap"),
584 ( -1, -1, -1, 0x42, 0x42, "s_buffer_atomic_add"),
585 ( -1, -1, -1, 0x43, 0x43, "s_buffer_atomic_sub"),
586 ( -1, -1, -1, 0x44, 0x44, "s_buffer_atomic_smin"),
587 ( -1, -1, -1, 0x45, 0x45, "s_buffer_atomic_umin"),
588 ( -1, -1, -1, 0x46, 0x46, "s_buffer_atomic_smax"),
589 ( -1, -1, -1, 0x47, 0x47, "s_buffer_atomic_umax"),
590 ( -1, -1, -1, 0x48, 0x48, "s_buffer_atomic_and"),
591 ( -1, -1, -1, 0x49, 0x49, "s_buffer_atomic_or"),
592 ( -1, -1, -1, 0x4a, 0x4a, "s_buffer_atomic_xor"),
593 ( -1, -1, -1, 0x4b, 0x4b, "s_buffer_atomic_inc"),
594 ( -1, -1, -1, 0x4c, 0x4c, "s_buffer_atomic_dec"),
595 ( -1, -1, -1, 0x60, 0x60, "s_buffer_atomic_swap_x2"),
596 ( -1, -1, -1, 0x61, 0x61, "s_buffer_atomic_cmpswap_x2"),
597 ( -1, -1, -1, 0x62, 0x62, "s_buffer_atomic_add_x2"),
598 ( -1, -1, -1, 0x63, 0x63, "s_buffer_atomic_sub_x2"),
599 ( -1, -1, -1, 0x64, 0x64, "s_buffer_atomic_smin_x2"),
600 ( -1, -1, -1, 0x65, 0x65, "s_buffer_atomic_umin_x2"),
601 ( -1, -1, -1, 0x66, 0x66, "s_buffer_atomic_smax_x2"),
602 ( -1, -1, -1, 0x67, 0x67, "s_buffer_atomic_umax_x2"),
603 ( -1, -1, -1, 0x68, 0x68, "s_buffer_atomic_and_x2"),
604 ( -1, -1, -1, 0x69, 0x69, "s_buffer_atomic_or_x2"),
605 ( -1, -1, -1, 0x6a, 0x6a, "s_buffer_atomic_xor_x2"),
606 ( -1, -1, -1, 0x6b, 0x6b, "s_buffer_atomic_inc_x2"),
607 ( -1, -1, -1, 0x6c, 0x6c, "s_buffer_atomic_dec_x2"),
608 ( -1, -1, -1, 0x80, 0x80, "s_atomic_swap"),
609 ( -1, -1, -1, 0x81, 0x81, "s_atomic_cmpswap"),
610 ( -1, -1, -1, 0x82, 0x82, "s_atomic_add"),
611 ( -1, -1, -1, 0x83, 0x83, "s_atomic_sub"),
612 ( -1, -1, -1, 0x84, 0x84, "s_atomic_smin"),
613 ( -1, -1, -1, 0x85, 0x85, "s_atomic_umin"),
614 ( -1, -1, -1, 0x86, 0x86, "s_atomic_smax"),
615 ( -1, -1, -1, 0x87, 0x87, "s_atomic_umax"),
616 ( -1, -1, -1, 0x88, 0x88, "s_atomic_and"),
617 ( -1, -1, -1, 0x89, 0x89, "s_atomic_or"),
618 ( -1, -1, -1, 0x8a, 0x8a, "s_atomic_xor"),
619 ( -1, -1, -1, 0x8b, 0x8b, "s_atomic_inc"),
620 ( -1, -1, -1, 0x8c, 0x8c, "s_atomic_dec"),
621 ( -1, -1, -1, 0xa0, 0xa0, "s_atomic_swap_x2"),
622 ( -1, -1, -1, 0xa1, 0xa1, "s_atomic_cmpswap_x2"),
623 ( -1, -1, -1, 0xa2, 0xa2, "s_atomic_add_x2"),
624 ( -1, -1, -1, 0xa3, 0xa3, "s_atomic_sub_x2"),
625 ( -1, -1, -1, 0xa4, 0xa4, "s_atomic_smin_x2"),
626 ( -1, -1, -1, 0xa5, 0xa5, "s_atomic_umin_x2"),
627 ( -1, -1, -1, 0xa6, 0xa6, "s_atomic_smax_x2"),
628 ( -1, -1, -1, 0xa7, 0xa7, "s_atomic_umax_x2"),
629 ( -1, -1, -1, 0xa8, 0xa8, "s_atomic_and_x2"),
630 ( -1, -1, -1, 0xa9, 0xa9, "s_atomic_or_x2"),
631 ( -1, -1, -1, 0xaa, 0xaa, "s_atomic_xor_x2"),
632 ( -1, -1, -1, 0xab, 0xab, "s_atomic_inc_x2"),
633 ( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"),
635 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in SMEM
:
636 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.SMEM
, is_atomic
= "atomic" in name
)
639 # VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
640 # TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8
642 # GFX6, GFX7, GFX8, GFX9, GFX10, name, input/output modifiers
643 (0x01, 0x01, -1, -1, -1, "v_readlane_b32", False),
644 (0x02, 0x02, -1, -1, -1, "v_writelane_b32", False),
645 (0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
646 (0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
647 (0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
648 (0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True),
649 (0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
650 (0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
651 (0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
652 (0x0a, 0x0a, 0x07, 0x07, 0x0a, "v_mul_hi_i32_i24", False),
653 (0x0b, 0x0b, 0x08, 0x08, 0x0b, "v_mul_u32_u24", False),
654 (0x0c, 0x0c, 0x09, 0x09, 0x0c, "v_mul_hi_u32_u24", False),
655 (0x0d, 0x0d, -1, -1, -1, "v_min_legacy_f32", True),
656 (0x0e, 0x0e, -1, -1, -1, "v_max_legacy_f32", True),
657 (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, "v_min_f32", True),
658 (0x10, 0x10, 0x0b, 0x0b, 0x10, "v_max_f32", True),
659 (0x11, 0x11, 0x0c, 0x0c, 0x11, "v_min_i32", False),
660 (0x12, 0x12, 0x0d, 0x0d, 0x12, "v_max_i32", False),
661 (0x13, 0x13, 0x0e, 0x0e, 0x13, "v_min_u32", False),
662 (0x14, 0x14, 0x0f, 0x0f, 0x14, "v_max_u32", False),
663 (0x15, 0x15, -1, -1, -1, "v_lshr_b32", False),
664 (0x16, 0x16, 0x10, 0x10, 0x16, "v_lshrrev_b32", False),
665 (0x17, 0x17, -1, -1, -1, "v_ashr_i32", False),
666 (0x18, 0x18, 0x11, 0x11, 0x18, "v_ashrrev_i32", False),
667 (0x19, 0x19, -1, -1, -1, "v_lshl_b32", False),
668 (0x1a, 0x1a, 0x12, 0x12, 0x1a, "v_lshlrev_b32", False),
669 (0x1b, 0x1b, 0x13, 0x13, 0x1b, "v_and_b32", False),
670 (0x1c, 0x1c, 0x14, 0x14, 0x1c, "v_or_b32", False),
671 (0x1d, 0x1d, 0x15, 0x15, 0x1d, "v_xor_b32", False),
672 ( -1, -1, -1, -1, 0x1e, "v_xnor_b32", False),
673 (0x1f, 0x1f, 0x16, 0x16, 0x1f, "v_mac_f32", True),
674 (0x20, 0x20, 0x17, 0x17, 0x20, "v_madmk_f32", False),
675 (0x21, 0x21, 0x18, 0x18, 0x21, "v_madak_f32", False),
676 (0x24, 0x24, -1, -1, -1, "v_mbcnt_hi_u32_b32", False),
677 (0x25, 0x25, 0x19, 0x19, -1, "v_add_co_u32", False), # VOP3B only in RDNA
678 (0x26, 0x26, 0x1a, 0x1a, -1, "v_sub_co_u32", False), # VOP3B only in RDNA
679 (0x27, 0x27, 0x1b, 0x1b, -1, "v_subrev_co_u32", False), # VOP3B only in RDNA
680 (0x28, 0x28, 0x1c, 0x1c, 0x28, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA
681 (0x29, 0x29, 0x1d, 0x1d, 0x29, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA
682 (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA
683 ( -1, -1, -1, -1, 0x2b, "v_fmac_f32", True),
684 ( -1, -1, -1, -1, 0x2c, "v_fmamk_f32", True),
685 ( -1, -1, -1, -1, 0x2d, "v_fmaak_f32", True),
686 ( -1, -1, 0x1f, 0x1f, 0x32, "v_add_f16", True),
687 ( -1, -1, 0x20, 0x20, 0x33, "v_sub_f16", True),
688 ( -1, -1, 0x21, 0x21, 0x34, "v_subrev_f16", True),
689 ( -1, -1, 0x22, 0x22, 0x35, "v_mul_f16", True),
690 ( -1, -1, 0x23, 0x23, -1, "v_mac_f16", True),
691 ( -1, -1, 0x24, 0x24, -1, "v_madmk_f16", False),
692 ( -1, -1, 0x25, 0x25, -1, "v_madak_f16", False),
693 ( -1, -1, 0x26, 0x26, -1, "v_add_u16", False),
694 ( -1, -1, 0x27, 0x27, -1, "v_sub_u16", False),
695 ( -1, -1, 0x28, 0x28, -1, "v_subrev_u16", False),
696 ( -1, -1, 0x29, 0x29, -1, "v_mul_lo_u16", False),
697 ( -1, -1, 0x2a, 0x2a, -1, "v_lshlrev_b16", False),
698 ( -1, -1, 0x2b, 0x2b, -1, "v_lshrrev_b16", False),
699 ( -1, -1, 0x2c, 0x2c, -1, "v_ashrrev_i16", False),
700 ( -1, -1, 0x2d, 0x2d, 0x39, "v_max_f16", True),
701 ( -1, -1, 0x2e, 0x2e, 0x3a, "v_min_f16", True),
702 ( -1, -1, 0x2f, 0x2f, -1, "v_max_u16", False),
703 ( -1, -1, 0x30, 0x30, -1, "v_max_i16", False),
704 ( -1, -1, 0x31, 0x31, -1, "v_min_u16", False),
705 ( -1, -1, 0x32, 0x32, -1, "v_min_i16", False),
706 ( -1, -1, 0x33, 0x33, 0x3b, "v_ldexp_f16", False),
707 ( -1, -1, 0x34, 0x34, 0x25, "v_add_u32", False), # v_add_nc_u32 in RDNA
708 ( -1, -1, 0x35, 0x35, 0x26, "v_sub_u32", False), # v_sub_nc_u32 in RDNA
709 ( -1, -1, 0x36, 0x36, 0x27, "v_subrev_u32", False), # v_subrev_nc_u32 in RDNA
710 ( -1, -1, -1, -1, 0x36, "v_fmac_f16", False),
711 ( -1, -1, -1, -1, 0x37, "v_fmamk_f16", False),
712 ( -1, -1, -1, -1, 0x38, "v_fmaak_f16", False),
713 ( -1, -1, -1, -1, 0x3c, "v_pk_fmac_f16", False),
715 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
, modifiers
) in VOP2
:
716 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOP2
, modifiers
, modifiers
)
719 # v_cndmask_b32 can use input modifiers but not output modifiers
720 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x00, 0x00, 0x00, 0x00, 0x01, "v_cndmask_b32")
721 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOP2
, True, False)
724 # VOP1 instructions: instructions with 1 input and 1 output
726 # GFX6, GFX7, GFX8, GFX9, GFX10, name, input_modifiers, output_modifiers
727 (0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False),
728 (0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False),
729 (0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False),
730 (0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False),
731 (0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True),
732 (0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True),
733 (0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True),
734 (0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False),
735 (0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
736 (0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
737 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
738 ( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True),
739 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
740 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
741 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),
742 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True),
743 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True),
744 (0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True),
745 (0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True),
746 (0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True),
747 (0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True),
748 (0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True),
749 (0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False),
750 (0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True),
751 ( -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True),
752 ( -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True),
753 ( -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True),
754 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True),
755 ( -1, -1, -1, -1, 0x1b, "v_pipeflush", False, False),
756 (0x20, 0x20, 0x1b, 0x1b, 0x20, "v_fract_f32", True, True),
757 (0x21, 0x21, 0x1c, 0x1c, 0x21, "v_trunc_f32", True, True),
758 (0x22, 0x22, 0x1d, 0x1d, 0x22, "v_ceil_f32", True, True),
759 (0x23, 0x23, 0x1e, 0x1e, 0x23, "v_rndne_f32", True, True),
760 (0x24, 0x24, 0x1f, 0x1f, 0x24, "v_floor_f32", True, True),
761 (0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True),
762 (0x26, 0x26, -1, -1, -1, "v_log_clamp_f32", True, True),
763 (0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True),
764 (0x28, 0x28, -1, -1, -1, "v_rcp_clamp_f32", True, True),
765 (0x29, 0x29, -1, -1, -1, "v_rcp_legacy_f32", True, True),
766 (0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True),
767 (0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True),
768 (0x2c, 0x2c, -1, -1, -1, "v_rsq_clamp_f32", True, True),
769 (0x2d, 0x2d, -1, -1, -1, "v_rsq_legacy_f32", True, True),
770 (0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True),
771 (0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True),
772 (0x30, 0x30, -1, -1, -1, "v_rcp_clamp_f64", True, True),
773 (0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True),
774 (0x32, 0x32, -1, -1, -1, "v_rsq_clamp_f64", True, True),
775 (0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True),
776 (0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True),
777 (0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True),
778 (0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True),
779 (0x37, 0x37, 0x2b, 0x2b, 0x37, "v_not_b32", False, False),
780 (0x38, 0x38, 0x2c, 0x2c, 0x38, "v_bfrev_b32", False, False),
781 (0x39, 0x39, 0x2d, 0x2d, 0x39, "v_ffbh_u32", False, False),
782 (0x3a, 0x3a, 0x2e, 0x2e, 0x3a, "v_ffbl_b32", False, False),
783 (0x3b, 0x3b, 0x2f, 0x2f, 0x3b, "v_ffbh_i32", False, False),
784 (0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False),
785 (0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False),
786 (0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True),
787 (0x3f, 0x3f, 0x33, 0x33, 0x3f, "v_frexp_exp_i32_f32", True, False),
788 (0x40, 0x40, 0x34, 0x34, 0x40, "v_frexp_mant_f32", True, False),
789 (0x41, 0x41, 0x35, 0x35, 0x41, "v_clrexcp", False, False),
790 (0x42, 0x42, 0x36, -1, 0x42, "v_movreld_b32", False, False),
791 (0x43, 0x43, 0x37, -1, 0x43, "v_movrels_b32", False, False),
792 (0x44, 0x44, 0x38, -1, 0x44, "v_movrelsd_b32", False, False),
793 ( -1, -1, -1, -1, 0x48, "v_movrelsd_2_b32", False, False),
794 ( -1, -1, -1, 0x37, -1, "v_screen_partition_4se_b32", False, False),
795 ( -1, -1, 0x39, 0x39, 0x50, "v_cvt_f16_u16", False, True),
796 ( -1, -1, 0x3a, 0x3a, 0x51, "v_cvt_f16_i16", False, True),
797 ( -1, -1, 0x3b, 0x3b, 0x52, "v_cvt_u16_f16", True, False),
798 ( -1, -1, 0x3c, 0x3c, 0x53, "v_cvt_i16_f16", True, False),
799 ( -1, -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True),
800 ( -1, -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True),
801 ( -1, -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True),
802 ( -1, -1, 0x40, 0x40, 0x57, "v_log_f16", True, True),
803 ( -1, -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True),
804 ( -1, -1, 0x42, 0x42, 0x59, "v_frexp_mant_f16", True, False),
805 ( -1, -1, 0x43, 0x43, 0x5a, "v_frexp_exp_i16_f16", True, False),
806 ( -1, -1, 0x44, 0x44, 0x5b, "v_floor_f16", True, True),
807 ( -1, -1, 0x45, 0x45, 0x5c, "v_ceil_f16", True, True),
808 ( -1, -1, 0x46, 0x46, 0x5d, "v_trunc_f16", True, True),
809 ( -1, -1, 0x47, 0x47, 0x5e, "v_rndne_f16", True, True),
810 ( -1, -1, 0x48, 0x48, 0x5f, "v_fract_f16", True, True),
811 ( -1, -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True),
812 ( -1, -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True),
813 ( -1, 0x46, 0x4b, 0x4b, -1, "v_exp_legacy_f32", True, True),
814 ( -1, 0x45, 0x4c, 0x4c, -1, "v_log_legacy_f32", True, True),
815 ( -1, -1, -1, 0x4f, 0x62, "v_sat_pk_u8_i16", False, False),
816 ( -1, -1, -1, 0x4d, 0x63, "v_cvt_norm_i16_f16", True, False),
817 ( -1, -1, -1, 0x4e, 0x64, "v_cvt_norm_u16_f16", True, False),
818 ( -1, -1, -1, 0x51, 0x65, "v_swap_b32", False, False),
819 ( -1, -1, -1, -1, 0x68, "v_swaprel_b32", False, False),
821 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
, in_mod
, out_mod
) in VOP1
:
822 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOP1
, in_mod
, out_mod
)
828 (0x88, 0x88, 0x10, 0x10, 0x88, "v_cmp_class_f32"),
829 ( -1, -1, 0x14, 0x14, 0x8f, "v_cmp_class_f16"),
830 (0x98, 0x98, 0x11, 0x11, 0x98, "v_cmpx_class_f32"),
831 ( -1, -1, 0x15, 0x15, 0x9f, "v_cmpx_class_f16"),
832 (0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64"),
833 (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64"),
835 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in VOPC_CLASS
:
836 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
838 COMPF
= ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]
841 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0x20+i
, 0x20+i
, 0xc8+i
, "v_cmp_"+COMPF
[i
]+"_f16")
842 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
843 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0x30+i
, 0x30+i
, 0xd8+i
, "v_cmpx_"+COMPF
[i
]+"_f16")
844 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
845 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0x28+i
, 0x28+i
, 0xe8+i
, "v_cmp_"+COMPF
[i
+8]+"_f16")
846 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
847 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0x38+i
, 0x38+i
, 0xf8+i
, "v_cmpx_"+COMPF
[i
+8]+"_f16")
848 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
851 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x00+i
, 0x00+i
, 0x40+i
, 0x40+i
, 0x00+i
, "v_cmp_"+COMPF
[i
]+"_f32")
852 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
853 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x10+i
, 0x10+i
, 0x50+i
, 0x50+i
, 0x10+i
, "v_cmpx_"+COMPF
[i
]+"_f32")
854 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
855 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x20+i
, 0x20+i
, 0x60+i
, 0x60+i
, 0x20+i
, "v_cmp_"+COMPF
[i
]+"_f64")
856 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
857 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x30+i
, 0x30+i
, 0x70+i
, 0x70+i
, 0x30+i
, "v_cmpx_"+COMPF
[i
]+"_f64")
858 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
, True, False)
860 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x40+i
, 0x40+i
, -1, -1, -1, "v_cmps_"+COMPF
[i
]+"_f32")
861 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x50+i
, 0x50+i
, -1, -1, -1, "v_cmpsx_"+COMPF
[i
]+"_f32")
862 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x60+i
, 0x60+i
, -1, -1, -1, "v_cmps_"+COMPF
[i
]+"_f64")
863 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x70+i
, 0x70+i
, -1, -1, -1, "v_cmpsx_"+COMPF
[i
]+"_f64")
865 COMPI
= ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]
868 for i
in [0,7]: # only 0 and 7
869 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0xa0+i
, 0xa0+i
, -1, "v_cmp_"+COMPI
[i
]+"_i16")
870 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
871 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0xb0+i
, 0xb0+i
, -1, "v_cmpx_"+COMPI
[i
]+"_i16")
872 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
873 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0xa8+i
, 0xa8+i
, -1, "v_cmp_"+COMPI
[i
]+"_u16")
874 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
875 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0xb8+i
, 0xb8+i
, -1, "v_cmpx_"+COMPI
[i
]+"_u16")
876 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
878 for i
in range(1, 7): # [1..6]
879 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0xa0+i
, 0xa0+i
, 0x88+i
, "v_cmp_"+COMPI
[i
]+"_i16")
880 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
881 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0xb0+i
, 0xb0+i
, 0x98+i
, "v_cmpx_"+COMPI
[i
]+"_i16")
882 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
883 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0xa8+i
, 0xa8+i
, 0xa8+i
, "v_cmp_"+COMPI
[i
]+"_u16")
884 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
885 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (-1, -1, 0xb8+i
, 0xb8+i
, 0xb8+i
, "v_cmpx_"+COMPI
[i
]+"_u16")
886 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
889 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x80+i
, 0x80+i
, 0xc0+i
, 0xc0+i
, 0x80+i
, "v_cmp_"+COMPI
[i
]+"_i32")
890 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
891 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0x90+i
, 0x90+i
, 0xd0+i
, 0xd0+i
, 0x90+i
, "v_cmpx_"+COMPI
[i
]+"_i32")
892 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
893 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0xa0+i
, 0xa0+i
, 0xe0+i
, 0xe0+i
, 0xa0+i
, "v_cmp_"+COMPI
[i
]+"_i64")
894 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
895 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0xb0+i
, 0xb0+i
, 0xf0+i
, 0xf0+i
, 0xb0+i
, "v_cmpx_"+COMPI
[i
]+"_i64")
896 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
897 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0xc0+i
, 0xc0+i
, 0xc8+i
, 0xc8+i
, 0xc0+i
, "v_cmp_"+COMPI
[i
]+"_u32")
898 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
899 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0xd0+i
, 0xd0+i
, 0xd8+i
, 0xd8+i
, 0xd0+i
, "v_cmpx_"+COMPI
[i
]+"_u32")
900 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
901 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0xe0+i
, 0xe0+i
, 0xe8+i
, 0xe8+i
, 0xe0+i
, "v_cmp_"+COMPI
[i
]+"_u64")
902 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
903 (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) = (0xf0+i
, 0xf0+i
, 0xf8+i
, 0xf8+i
, 0xf0+i
, "v_cmpx_"+COMPI
[i
]+"_u64")
904 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOPC
)
907 # VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output
909 (0x00, "v_pk_mad_i16"),
910 (0x01, "v_pk_mul_lo_u16"),
911 (0x02, "v_pk_add_i16"),
912 (0x03, "v_pk_sub_i16"),
913 (0x04, "v_pk_lshlrev_b16"),
914 (0x05, "v_pk_lshrrev_b16"),
915 (0x06, "v_pk_ashrrev_i16"),
916 (0x07, "v_pk_max_i16"),
917 (0x08, "v_pk_min_i16"),
918 (0x09, "v_pk_mad_u16"),
919 (0x0a, "v_pk_add_u16"),
920 (0x0b, "v_pk_sub_u16"),
921 (0x0c, "v_pk_max_u16"),
922 (0x0d, "v_pk_min_u16"),
923 (0x0e, "v_pk_fma_f16"),
924 (0x0f, "v_pk_add_f16"),
925 (0x10, "v_pk_mul_f16"),
926 (0x11, "v_pk_min_f16"),
927 (0x12, "v_pk_max_f16"),
928 (0x20, "v_pk_fma_mix_f32"), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA
929 (0x21, "v_pk_fma_mixlo_f16"), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA
930 (0x22, "v_pk_fma_mixhi_f16"), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA
932 # note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here
933 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name)
934 for (code
, name
) in VOPP
:
935 opcode(name
, -1, code
, code
, Format
.VOP3P
)
938 # VINTERP instructions:
940 (0x00, "v_interp_p1_f32"),
941 (0x01, "v_interp_p2_f32"),
942 (0x02, "v_interp_mov_f32"),
944 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
945 for (code
, name
) in VINTRP
:
946 opcode(name
, code
, code
, code
, Format
.VINTRP
)
948 # VOP3 instructions: 3 inputs, 1 output
949 # VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out
951 (0x140, 0x140, 0x1c0, 0x1c0, 0x140, "v_mad_legacy_f32", True, True),
952 (0x141, 0x141, 0x1c1, 0x1c1, 0x141, "v_mad_f32", True, True),
953 (0x142, 0x142, 0x1c2, 0x1c2, 0x142, "v_mad_i32_i24", False, False),
954 (0x143, 0x143, 0x1c3, 0x1c3, 0x143, "v_mad_u32_u24", False, False),
955 (0x144, 0x144, 0x1c4, 0x1c4, 0x144, "v_cubeid_f32", True, True),
956 (0x145, 0x145, 0x1c5, 0x1c5, 0x145, "v_cubesc_f32", True, True),
957 (0x146, 0x146, 0x1c6, 0x1c6, 0x146, "v_cubetc_f32", True, True),
958 (0x147, 0x147, 0x1c7, 0x1c7, 0x147, "v_cubema_f32", True, True),
959 (0x148, 0x148, 0x1c8, 0x1c8, 0x148, "v_bfe_u32", False, False),
960 (0x149, 0x149, 0x1c9, 0x1c9, 0x149, "v_bfe_i32", False, False),
961 (0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, "v_bfi_b32", False, False),
962 (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True),
963 (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True),
964 (0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, "v_lerp_u8", False, False),
965 (0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, "v_alignbit_b32", False, False),
966 (0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, "v_alignbyte_b32", False, False),
967 (0x150, 0x150, -1, -1, 0x150, "v_mullit_f32", True, True),
968 (0x151, 0x151, 0x1d0, 0x1d0, 0x151, "v_min3_f32", True, True),
969 (0x152, 0x152, 0x1d1, 0x1d1, 0x152, "v_min3_i32", False, False),
970 (0x153, 0x153, 0x1d2, 0x1d2, 0x153, "v_min3_u32", False, False),
971 (0x154, 0x154, 0x1d3, 0x1d3, 0x154, "v_max3_f32", True, True),
972 (0x155, 0x155, 0x1d4, 0x1d4, 0x155, "v_max3_i32", False, False),
973 (0x156, 0x156, 0x1d5, 0x1d5, 0x156, "v_max3_u32", False, False),
974 (0x157, 0x157, 0x1d6, 0x1d6, 0x157, "v_med3_f32", True, True),
975 (0x158, 0x158, 0x1d7, 0x1d7, 0x158, "v_med3_i32", False, False),
976 (0x159, 0x159, 0x1d8, 0x1d8, 0x159, "v_med3_u32", False, False),
977 (0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, "v_sad_u8", False, False),
978 (0x15b, 0x15b, 0x1da, 0x1da, 0x15b, "v_sad_hi_u8", False, False),
979 (0x15c, 0x15c, 0x1db, 0x1db, 0x15c, "v_sad_u16", False, False),
980 (0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, "v_sad_u32", False, False),
981 (0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, "v_cvt_pk_u8_f32", True, False),
982 (0x15f, 0x15f, 0x1de, 0x1de, 0x15f, "v_div_fixup_f32", True, True),
983 (0x160, 0x160, 0x1df, 0x1df, 0x160, "v_div_fixup_f64", True, True),
984 (0x161, 0x161, -1, -1, -1, "v_lshl_b64", False, False),
985 (0x162, 0x162, -1, -1, -1, "v_lshr_b64", False, False),
986 (0x163, 0x163, -1, -1, -1, "v_ashr_i64", False, False),
987 (0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True),
988 (0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True),
989 (0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True),
990 (0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True),
991 (0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True), # src1 can take input modifiers
992 (0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False),
993 (0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False),
994 (0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False), # identical to v_mul_lo_u32
995 (0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False),
996 (0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, "v_div_scale_f32", True, True), # writes to VCC
997 (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True), # writes to VCC
998 (0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, "v_div_fmas_f32", True, True), # takes VCC input
999 (0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True), # takes VCC input
1000 (0x171, 0x171, 0x1e4, 0x1e4, 0x171, "v_msad_u8", False, False),
1001 (0x172, 0x172, 0x1e5, 0x1e5, 0x172, "v_qsad_pk_u16_u8", False, False),
1002 (0x172, -1, -1, -1, -1, "v_qsad_u8", False, False), # what's the difference?
1003 (0x173, 0x173, 0x1e6, 0x1e6, 0x173, "v_mqsad_pk_u16_u8", False, False),
1004 (0x173, -1, -1, -1, -1, "v_mqsad_u8", False, False), # what's the difference?
1005 (0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False),
1006 ( -1, 0x175, 0x1e7, 0x1e7, 0x175, "v_mqsad_u32_u8", False, False),
1007 ( -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False),
1008 ( -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False),
1009 ( -1, -1, 0x1ea, 0x1ea, -1, "v_mad_legacy_f16", True, True),
1010 ( -1, -1, 0x1eb, 0x1eb, -1, "v_mad_legacy_u16", False, False),
1011 ( -1, -1, 0x1ec, 0x1ec, -1, "v_mad_legacy_i16", False, False),
1012 ( -1, -1, 0x1ed, 0x1ed, 0x344, "v_perm_b32", False, False),
1013 ( -1, -1, 0x1ee, 0x1ee, -1, "v_fma_legacy_f16", True, True),
1014 ( -1, -1, 0x1ef, 0x1ef, -1, "v_div_fixup_legacy_f16", True, True),
1015 (0x12c, 0x12c, 0x1f0, 0x1f0, -1, "v_cvt_pkaccum_u8_f32", True, False),
1016 ( -1, -1, -1, 0x1f1, 0x373, "v_mad_u32_u16", False, False),
1017 ( -1, -1, -1, 0x1f2, 0x375, "v_mad_i32_i16", False, False),
1018 ( -1, -1, -1, 0x1f3, 0x345, "v_xad_u32", False, False),
1019 ( -1, -1, -1, 0x1f4, 0x351, "v_min3_f16", True, True),
1020 ( -1, -1, -1, 0x1f5, 0x352, "v_min3_i16", False, False),
1021 ( -1, -1, -1, 0x1f6, 0x353, "v_min3_u16", False, False),
1022 ( -1, -1, -1, 0x1f7, 0x354, "v_max3_f16", True, True),
1023 ( -1, -1, -1, 0x1f8, 0x355, "v_max3_i16", False, False),
1024 ( -1, -1, -1, 0x1f9, 0x356, "v_max3_u16", False, False),
1025 ( -1, -1, -1, 0x1fa, 0x357, "v_med3_f16", True, True),
1026 ( -1, -1, -1, 0x1fb, 0x358, "v_med3_i16", False, False),
1027 ( -1, -1, -1, 0x1fc, 0x359, "v_med3_u16", False, False),
1028 ( -1, -1, -1, 0x1fd, 0x346, "v_lshl_add_u32", False, False),
1029 ( -1, -1, -1, 0x1fe, 0x347, "v_add_lshl_u32", False, False),
1030 ( -1, -1, -1, 0x1ff, 0x36d, "v_add3_u32", False, False),
1031 ( -1, -1, -1, 0x200, 0x36f, "v_lshl_or_b32", False, False),
1032 ( -1, -1, -1, 0x201, 0x371, "v_and_or_b32", False, False),
1033 ( -1, -1, -1, 0x202, 0x372, "v_or3_b32", False, False),
1034 ( -1, -1, -1, 0x203, -1, "v_mad_f16", True, True),
1035 ( -1, -1, -1, 0x204, 0x340, "v_mad_u16", False, False),
1036 ( -1, -1, -1, 0x205, 0x35e, "v_mad_i16", False, False),
1037 ( -1, -1, -1, 0x206, 0x34b, "v_fma_f16", True, True),
1038 ( -1, -1, -1, 0x207, 0x35f, "v_div_fixup_f16", True, True),
1039 ( -1, -1, 0x274, 0x274, 0x342, "v_interp_p1ll_f16", True, True),
1040 ( -1, -1, 0x275, 0x275, 0x343, "v_interp_p1lv_f16", True, True),
1041 ( -1, -1, 0x276, 0x276, -1, "v_interp_p2_legacy_f16", True, True),
1042 ( -1, -1, -1, 0x277, 0x35a, "v_interp_p2_f16", True, True),
1043 (0x12b, 0x12b, 0x288, 0x288, 0x362, "v_ldexp_f32", False, True),
1044 ( -1, -1, 0x289, 0x289, 0x360, "v_readlane_b32_e64", False, False),
1045 ( -1, -1, 0x28a, 0x28a, 0x361, "v_writelane_b32_e64", False, False),
1046 (0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False),
1047 (0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False),
1048 ( -1, -1, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32_e64", False, False),
1049 ( -1, -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False),
1050 ( -1, -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False),
1051 ( -1, -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False),
1052 (0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),
1053 (0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),
1054 (0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),
1055 (0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
1056 (0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False),
1057 (0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False),
1058 ( -1, -1, -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False),
1059 ( -1, -1, -1, 0x29a, 0x313, "v_cvt_pknorm_u16_f16", True, False),
1060 ( -1, -1, -1, 0x29c, 0x37f, "v_add_i32", False, False),
1061 ( -1, -1, -1, 0x29d, 0x376, "v_sub_i32", False, False),
1062 ( -1, -1, -1, 0x29e, 0x30d, "v_add_i16", False, False),
1063 ( -1, -1, -1, 0x29f, 0x30e, "v_sub_i16", False, False),
1064 ( -1, -1, -1, 0x2a0, 0x311, "v_pack_b32_f16", True, False),
1065 ( -1, -1, -1, -1, 0x178, "v_xor3_b32", False, False),
1066 ( -1, -1, -1, -1, 0x377, "v_permlane16_b32", False, False),
1067 ( -1, -1, -1, -1, 0x378, "v_permlanex16_b32", False, False),
1068 ( -1, -1, -1, -1, 0x30f, "v_add_co_u32_e64", False, False),
1069 ( -1, -1, -1, -1, 0x310, "v_sub_co_u32_e64", False, False),
1070 ( -1, -1, -1, -1, 0x319, "v_subrev_co_u32_e64", False, False),
1071 ( -1, -1, -1, -1, 0x303, "v_add_u16_e64", False, False),
1072 ( -1, -1, -1, -1, 0x304, "v_sub_u16_e64", False, False),
1073 ( -1, -1, -1, -1, 0x305, "v_mul_lo_u16_e64", False, False),
1074 ( -1, -1, -1, -1, 0x309, "v_max_u16_e64", False, False),
1075 ( -1, -1, -1, -1, 0x30a, "v_max_i16_e64", False, False),
1076 ( -1, -1, -1, -1, 0x30b, "v_min_u16_e64", False, False),
1077 ( -1, -1, -1, -1, 0x30c, "v_min_i16_e64", False, False),
1078 ( -1, -1, -1, -1, 0x307, "v_lshrrev_b16_e64", False, False),
1079 ( -1, -1, -1, -1, 0x308, "v_ashrrev_i16_e64", False, False),
1080 ( -1, -1, -1, -1, 0x314, "v_lshlrev_b16_e64", False, False),
1082 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
, in_mod
, out_mod
) in VOP3
:
1083 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.VOP3A
, in_mod
, out_mod
)
1086 # DS instructions: 3 inputs (1 addr, 2 data), 1 output
1088 (0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
1089 (0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"),
1090 (0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"),
1091 (0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"),
1092 (0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"),
1093 (0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"),
1094 (0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"),
1095 (0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"),
1096 (0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"),
1097 (0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"),
1098 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"),
1099 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"),
1100 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"),
1101 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"),
1102 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"),
1103 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"),
1104 (0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"),
1105 (0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"),
1106 (0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"),
1107 (0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"),
1108 ( -1, 0x14, 0x14, 0x14, 0x14, "ds_nop"),
1109 ( -1, -1, 0x15, 0x15, 0x15, "ds_add_f32"),
1110 ( -1, -1, 0x1d, 0x1d, 0xb0, "ds_write_addtid_b32"),
1111 (0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"),
1112 (0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"),
1113 (0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"),
1114 (0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"),
1115 (0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"),
1116 (0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"),
1117 (0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"),
1118 (0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"),
1119 (0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"),
1120 (0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"),
1121 (0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"),
1122 (0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"),
1123 (0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"),
1124 (0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"),
1125 (0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"),
1126 (0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"),
1127 (0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"),
1128 (0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"),
1129 (0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"),
1130 (0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"),
1131 (0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"),
1132 (0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"),
1133 ( -1, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"),
1134 ( -1, -1, 0x35, 0x35, 0x55, "ds_add_rtn_f32"),
1135 (0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"),
1136 (0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"),
1137 (0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"),
1138 (0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"),
1139 (0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"),
1140 (0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"),
1141 (0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"),
1142 (0x35, 0x35, 0x3d, 0x3d, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2
1143 ( -1, -1, 0x3e, 0x3e, 0xb2, "ds_permute_b32"),
1144 ( -1, -1, 0x3f, 0x3f, 0xb3, "ds_bpermute_b32"),
1145 (0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"),
1146 (0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"),
1147 (0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"),
1148 (0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"),
1149 (0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"),
1150 (0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"),
1151 (0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"),
1152 (0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"),
1153 (0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"),
1154 (0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"),
1155 (0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"),
1156 (0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"),
1157 (0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"),
1158 (0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"),
1159 (0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"),
1160 (0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"),
1161 (0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"),
1162 (0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"),
1163 (0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"),
1164 (0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"),
1165 ( -1, -1, 0x54, 0x54, 0xa0, "ds_write_b8_d16_hi"),
1166 ( -1, -1, 0x55, 0x55, 0xa1, "ds_write_b16_d16_hi"),
1167 ( -1, -1, 0x56, 0x56, 0xa2, "ds_read_u8_d16"),
1168 ( -1, -1, 0x57, 0x57, 0xa3, "ds_read_u8_d16_hi"),
1169 ( -1, -1, 0x58, 0x58, 0xa4, "ds_read_i8_d16"),
1170 ( -1, -1, 0x59, 0x59, 0xa5, "ds_read_i8_d16_hi"),
1171 ( -1, -1, 0x5a, 0x5a, 0xa6, "ds_read_u16_d16"),
1172 ( -1, -1, 0x5b, 0x5b, 0xa7, "ds_read_u16_d16_hi"),
1173 (0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"),
1174 (0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"),
1175 (0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"),
1176 (0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"),
1177 (0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"),
1178 (0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"),
1179 (0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"),
1180 (0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"),
1181 (0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"),
1182 (0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"),
1183 (0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"),
1184 (0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"),
1185 (0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"),
1186 (0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"),
1187 (0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"),
1188 (0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"),
1189 (0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"),
1190 (0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"),
1191 (0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"),
1192 (0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"),
1193 (0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"),
1194 (0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"),
1195 (0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"),
1196 ( -1, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"),
1197 (0x80, 0x80, 0x80, 0x80, 0x80, "ds_add_src2_u32"),
1198 (0x81, 0x81, 0x81, 0x81, 0x81, "ds_sub_src2_u32"),
1199 (0x82, 0x82, 0x82, 0x82, 0x82, "ds_rsub_src2_u32"),
1200 (0x83, 0x83, 0x83, 0x83, 0x83, "ds_inc_src2_u32"),
1201 (0x84, 0x84, 0x84, 0x84, 0x84, "ds_dec_src2_u32"),
1202 (0x85, 0x85, 0x85, 0x85, 0x85, "ds_min_src2_i32"),
1203 (0x86, 0x86, 0x86, 0x86, 0x86, "ds_max_src2_i32"),
1204 (0x87, 0x87, 0x87, 0x87, 0x87, "ds_min_src2_u32"),
1205 (0x88, 0x88, 0x88, 0x88, 0x88, "ds_max_src2_u32"),
1206 (0x89, 0x89, 0x89, 0x89, 0x89, "ds_and_src2_b32"),
1207 (0x8a, 0x8a, 0x8a, 0x8a, 0x8a, "ds_or_src2_b32"),
1208 (0x8b, 0x8b, 0x8b, 0x8b, 0x8b, "ds_xor_src2_b32"),
1209 (0x8d, 0x8d, 0x8d, 0x8d, 0x8d, "ds_write_src2_b32"),
1210 (0x92, 0x92, 0x92, 0x92, 0x92, "ds_min_src2_f32"),
1211 (0x93, 0x93, 0x93, 0x93, 0x93, "ds_max_src2_f32"),
1212 ( -1, -1, 0x95, 0x95, 0x95, "ds_add_src2_f32"),
1213 ( -1, 0x18, 0x98, 0x98, 0x18, "ds_gws_sema_release_all"),
1214 (0x19, 0x19, 0x99, 0x99, 0x19, "ds_gws_init"),
1215 (0x1a, 0x1a, 0x9a, 0x9a, 0x1a, "ds_gws_sema_v"),
1216 (0x1b, 0x1b, 0x9b, 0x9b, 0x1b, "ds_gws_sema_br"),
1217 (0x1c, 0x1c, 0x9c, 0x9c, 0x1c, "ds_gws_sema_p"),
1218 (0x1d, 0x1d, 0x9d, 0x9d, 0x1d, "ds_gws_barrier"),
1219 ( -1, -1, 0xb6, 0xb6, 0xb1, "ds_read_addtid_b32"),
1220 (0x3d, 0x3d, 0xbd, 0xbd, 0x3d, "ds_consume"),
1221 (0x3e, 0x3e, 0xbe, 0xbe, 0x3e, "ds_append"),
1222 (0x3f, 0x3f, 0xbf, 0xbf, 0x3f, "ds_ordered_count"),
1223 (0xc0, 0xc0, 0xc0, 0xc0, 0xc0, "ds_add_src2_u64"),
1224 (0xc1, 0xc1, 0xc1, 0xc1, 0xc1, "ds_sub_src2_u64"),
1225 (0xc2, 0xc2, 0xc2, 0xc2, 0xc2, "ds_rsub_src2_u64"),
1226 (0xc3, 0xc3, 0xc3, 0xc3, 0xc3, "ds_inc_src2_u64"),
1227 (0xc4, 0xc4, 0xc4, 0xc4, 0xc4, "ds_dec_src2_u64"),
1228 (0xc5, 0xc5, 0xc5, 0xc5, 0xc5, "ds_min_src2_i64"),
1229 (0xc6, 0xc6, 0xc6, 0xc6, 0xc6, "ds_max_src2_i64"),
1230 (0xc7, 0xc7, 0xc7, 0xc7, 0xc7, "ds_min_src2_u64"),
1231 (0xc8, 0xc8, 0xc8, 0xc8, 0xc8, "ds_max_src2_u64"),
1232 (0xc9, 0xc9, 0xc9, 0xc9, 0xc9, "ds_and_src2_b64"),
1233 (0xca, 0xca, 0xca, 0xca, 0xca, "ds_or_src2_b64"),
1234 (0xcb, 0xcb, 0xcb, 0xcb, 0xcb, "ds_xor_src2_b64"),
1235 (0xcd, 0xcd, 0xcd, 0xcd, 0xcd, "ds_write_src2_b64"),
1236 (0xd2, 0xd2, 0xd2, 0xd2, 0xd2, "ds_min_src2_f64"),
1237 (0xd3, 0xd3, 0xd3, 0xd3, 0xd3, "ds_max_src2_f64"),
1238 ( -1, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"),
1239 ( -1, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"),
1240 ( -1, 0xfd, 0xfd, -1, -1, "ds_condxchg32_rtn_b128"),
1241 ( -1, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"),
1242 ( -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"),
1244 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in DS
:
1245 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.DS
)
1247 # MUBUF instructions:
1249 (0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"),
1250 (0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"),
1251 (0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"),
1252 (0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"),
1253 (0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"),
1254 (0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"),
1255 (0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"),
1256 (0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"),
1257 ( -1, -1, 0x08, 0x08, 0x80, "buffer_load_format_d16_x"),
1258 ( -1, -1, 0x09, 0x09, 0x81, "buffer_load_format_d16_xy"),
1259 ( -1, -1, 0x0a, 0x0a, 0x82, "buffer_load_format_d16_xyz"),
1260 ( -1, -1, 0x0b, 0x0b, 0x83, "buffer_load_format_d16_xyzw"),
1261 ( -1, -1, 0x0c, 0x0c, 0x84, "buffer_store_format_d16_x"),
1262 ( -1, -1, 0x0d, 0x0d, 0x85, "buffer_store_format_d16_xy"),
1263 ( -1, -1, 0x0e, 0x0e, 0x86, "buffer_store_format_d16_xyz"),
1264 ( -1, -1, 0x0f, 0x0f, 0x87, "buffer_store_format_d16_xyzw"),
1265 (0x08, 0x08, 0x10, 0x10, 0x08, "buffer_load_ubyte"),
1266 (0x09, 0x09, 0x11, 0x11, 0x09, "buffer_load_sbyte"),
1267 (0x0a, 0x0a, 0x12, 0x12, 0x0a, "buffer_load_ushort"),
1268 (0x0b, 0x0b, 0x13, 0x13, 0x0b, "buffer_load_sshort"),
1269 (0x0c, 0x0c, 0x14, 0x14, 0x0c, "buffer_load_dword"),
1270 (0x0d, 0x0d, 0x15, 0x15, 0x0d, "buffer_load_dwordx2"),
1271 ( -1, 0x0f, 0x16, 0x16, 0x0f, "buffer_load_dwordx3"),
1272 (0x0f, 0x0e, 0x17, 0x17, 0x0e, "buffer_load_dwordx4"),
1273 (0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"),
1274 ( -1, -1, -1, 0x19, 0x19, "buffer_store_byte_d16_hi"),
1275 (0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "buffer_store_short"),
1276 ( -1, -1, -1, 0x1b, 0x1b, "buffer_store_short_d16_hi"),
1277 (0x1c, 0x1c, 0x1c, 0x1c, 0x1c, "buffer_store_dword"),
1278 (0x1d, 0x1d, 0x1d, 0x1d, 0x1d, "buffer_store_dwordx2"),
1279 ( -1, 0x1f, 0x1e, 0x1e, 0x1f, "buffer_store_dwordx3"),
1280 (0x1e, 0x1e, 0x1f, 0x1f, 0x1e, "buffer_store_dwordx4"),
1281 ( -1, -1, -1, 0x20, 0x20, "buffer_load_ubyte_d16"),
1282 ( -1, -1, -1, 0x21, 0x21, "buffer_load_ubyte_d16_hi"),
1283 ( -1, -1, -1, 0x22, 0x22, "buffer_load_sbyte_d16"),
1284 ( -1, -1, -1, 0x23, 0x23, "buffer_load_sbyte_d16_hi"),
1285 ( -1, -1, -1, 0x24, 0x24, "buffer_load_short_d16"),
1286 ( -1, -1, -1, 0x25, 0x25, "buffer_load_short_d16_hi"),
1287 ( -1, -1, -1, 0x26, 0x26, "buffer_load_format_d16_hi_x"),
1288 ( -1, -1, -1, 0x27, 0x27, "buffer_store_format_d16_hi_x"),
1289 ( -1, -1, 0x3d, 0x3d, -1, "buffer_store_lds_dword"),
1290 (0x71, 0x71, 0x3e, 0x3e, -1, "buffer_wbinvl1"),
1291 (0x70, 0x70, 0x3f, 0x3f, -1, "buffer_wbinvl1_vol"),
1292 (0x30, 0x30, 0x40, 0x40, 0x30, "buffer_atomic_swap"),
1293 (0x31, 0x31, 0x41, 0x41, 0x31, "buffer_atomic_cmpswap"),
1294 (0x32, 0x32, 0x42, 0x42, 0x32, "buffer_atomic_add"),
1295 (0x33, 0x33, 0x43, 0x43, 0x33, "buffer_atomic_sub"),
1296 (0x34, -1, -1, -1, -1, "buffer_atomic_rsub"),
1297 (0x35, 0x35, 0x44, 0x44, 0x35, "buffer_atomic_smin"),
1298 (0x36, 0x36, 0x45, 0x45, 0x36, "buffer_atomic_umin"),
1299 (0x37, 0x37, 0x46, 0x46, 0x37, "buffer_atomic_smax"),
1300 (0x38, 0x38, 0x47, 0x47, 0x38, "buffer_atomic_umax"),
1301 (0x39, 0x39, 0x48, 0x48, 0x39, "buffer_atomic_and"),
1302 (0x3a, 0x3a, 0x49, 0x49, 0x3a, "buffer_atomic_or"),
1303 (0x3b, 0x3b, 0x4a, 0x4a, 0x3b, "buffer_atomic_xor"),
1304 (0x3c, 0x3c, 0x4b, 0x4b, 0x3c, "buffer_atomic_inc"),
1305 (0x3d, 0x3d, 0x4c, 0x4c, 0x3d, "buffer_atomic_dec"),
1306 (0x3e, 0x3e, -1, -1, 0x3e, "buffer_atomic_fcmpswap"),
1307 (0x3f, 0x3f, -1, -1, 0x3f, "buffer_atomic_fmin"),
1308 (0x40, 0x40, -1, -1, 0x40, "buffer_atomic_fmax"),
1309 (0x50, 0x50, 0x60, 0x60, 0x50, "buffer_atomic_swap_x2"),
1310 (0x51, 0x51, 0x61, 0x61, 0x51, "buffer_atomic_cmpswap_x2"),
1311 (0x52, 0x52, 0x62, 0x62, 0x52, "buffer_atomic_add_x2"),
1312 (0x53, 0x53, 0x63, 0x63, 0x53, "buffer_atomic_sub_x2"),
1313 (0x54, -1, -1, -1, -1, "buffer_atomic_rsub_x2"),
1314 (0x55, 0x55, 0x64, 0x64, 0x55, "buffer_atomic_smin_x2"),
1315 (0x56, 0x56, 0x65, 0x65, 0x56, "buffer_atomic_umin_x2"),
1316 (0x57, 0x57, 0x66, 0x66, 0x57, "buffer_atomic_smax_x2"),
1317 (0x58, 0x58, 0x67, 0x67, 0x58, "buffer_atomic_umax_x2"),
1318 (0x59, 0x59, 0x68, 0x68, 0x59, "buffer_atomic_and_x2"),
1319 (0x5a, 0x5a, 0x69, 0x69, 0x5a, "buffer_atomic_or_x2"),
1320 (0x5b, 0x5b, 0x6a, 0x6a, 0x5b, "buffer_atomic_xor_x2"),
1321 (0x5c, 0x5c, 0x6b, 0x6b, 0x5c, "buffer_atomic_inc_x2"),
1322 (0x5d, 0x5d, 0x6c, 0x6c, 0x5d, "buffer_atomic_dec_x2"),
1323 (0x5e, 0x5e, -1, -1, 0x5e, "buffer_atomic_fcmpswap_x2"),
1324 (0x5f, 0x5f, -1, -1, 0x5f, "buffer_atomic_fmin_x2"),
1325 (0x60, 0x60, -1, -1, 0x60, "buffer_atomic_fmax_x2"),
1326 ( -1, -1, -1, -1, 0x71, "buffer_gl0_inv"),
1327 ( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"),
1329 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in MUBUF
:
1330 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.MUBUF
, is_atomic
= "atomic" in name
)
1333 (0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
1334 (0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"),
1335 (0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"),
1336 (0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"),
1337 (0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"),
1338 (0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"),
1339 (0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"),
1340 (0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"),
1341 ( -1, -1, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"),
1342 ( -1, -1, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"),
1343 ( -1, -1, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"),
1344 ( -1, -1, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"),
1345 ( -1, -1, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"),
1346 ( -1, -1, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"),
1347 ( -1, -1, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"),
1348 ( -1, -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),
1350 for (gfx6
, gfx7
, gfx8
, gfx9
, gfx10
, name
) in MTBUF
:
1351 opcode(name
, gfx7
, gfx9
, gfx10
, Format
.MTBUF
)
1355 (0x00, "image_load"),
1356 (0x01, "image_load_mip"),
1357 (0x02, "image_load_pck"),
1358 (0x03, "image_load_pck_sgn"),
1359 (0x04, "image_load_mip_pck"),
1360 (0x05, "image_load_mip_pck_sgn"),
1361 (0x08, "image_store"),
1362 (0x09, "image_store_mip"),
1363 (0x0a, "image_store_pck"),
1364 (0x0b, "image_store_mip_pck"),
1365 (0x0e, "image_get_resinfo"),
1366 (0x60, "image_get_lod"),
1368 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
1369 for (code
, name
) in IMAGE
:
1370 opcode(name
, code
, code
, code
, Format
.MIMG
)
1373 (0x0f, 0x0f, 0x10, "image_atomic_swap"),
1374 (0x10, 0x10, 0x11, "image_atomic_cmpswap"),
1375 (0x11, 0x11, 0x12, "image_atomic_add"),
1376 (0x12, 0x12, 0x13, "image_atomic_sub"),
1377 (0x13, -1, -1, "image_atomic_rsub"),
1378 (0x14, 0x14, 0x14, "image_atomic_smin"),
1379 (0x15, 0x15, 0x15, "image_atomic_umin"),
1380 (0x16, 0x16, 0x16, "image_atomic_smax"),
1381 (0x17, 0x17, 0x17, "image_atomic_umax"),
1382 (0x18, 0x18, 0x18, "image_atomic_and"),
1383 (0x19, 0x19, 0x19, "image_atomic_or"),
1384 (0x1a, 0x1a, 0x1a, "image_atomic_xor"),
1385 (0x1b, 0x1b, 0x1b, "image_atomic_inc"),
1386 (0x1c, 0x1c, 0x1c, "image_atomic_dec"),
1387 (0x1d, 0x1d, -1, "image_atomic_fcmpswap"),
1388 (0x1e, 0x1e, -1, "image_atomic_fmin"),
1389 (0x1f, 0x1f, -1, "image_atomic_fmax"),
1391 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)
1392 # gfx7 and gfx10 opcodes are the same here
1393 for (gfx6
, gfx7
, gfx89
, name
) in IMAGE_ATOMIC
:
1394 opcode(name
, gfx7
, gfx89
, gfx7
, Format
.MIMG
, is_atomic
= True)
1397 (0x20, "image_sample"),
1398 (0x21, "image_sample_cl"),
1399 (0x22, "image_sample_d"),
1400 (0x23, "image_sample_d_cl"),
1401 (0x24, "image_sample_l"),
1402 (0x25, "image_sample_b"),
1403 (0x26, "image_sample_b_cl"),
1404 (0x27, "image_sample_lz"),
1405 (0x28, "image_sample_c"),
1406 (0x29, "image_sample_c_cl"),
1407 (0x2a, "image_sample_c_d"),
1408 (0x2b, "image_sample_c_d_cl"),
1409 (0x2c, "image_sample_c_l"),
1410 (0x2d, "image_sample_c_b"),
1411 (0x2e, "image_sample_c_b_cl"),
1412 (0x2f, "image_sample_c_lz"),
1413 (0x30, "image_sample_o"),
1414 (0x31, "image_sample_cl_o"),
1415 (0x32, "image_sample_d_o"),
1416 (0x33, "image_sample_d_cl_o"),
1417 (0x34, "image_sample_l_o"),
1418 (0x35, "image_sample_b_o"),
1419 (0x36, "image_sample_b_cl_o"),
1420 (0x37, "image_sample_lz_o"),
1421 (0x38, "image_sample_c_o"),
1422 (0x39, "image_sample_c_cl_o"),
1423 (0x3a, "image_sample_c_d_o"),
1424 (0x3b, "image_sample_c_d_cl_o"),
1425 (0x3c, "image_sample_c_l_o"),
1426 (0x3d, "image_sample_c_b_o"),
1427 (0x3e, "image_sample_c_b_cl_o"),
1428 (0x3f, "image_sample_c_lz_o"),
1429 (0x68, "image_sample_cd"),
1430 (0x69, "image_sample_cd_cl"),
1431 (0x6a, "image_sample_c_cd"),
1432 (0x6b, "image_sample_c_cd_cl"),
1433 (0x6c, "image_sample_cd_o"),
1434 (0x6d, "image_sample_cd_cl_o"),
1435 (0x6e, "image_sample_c_cd_o"),
1436 (0x6f, "image_sample_c_cd_cl_o"),
1438 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
1439 for (code
, name
) in IMAGE_SAMPLE
:
1440 opcode(name
, code
, code
, code
, Format
.MIMG
)
1443 (0x40, "image_gather4"),
1444 (0x41, "image_gather4_cl"),
1445 #(0x42, "image_gather4h"), VEGA only?
1446 (0x44, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet.
1447 (0x45, "image_gather4_b"),
1448 (0x46, "image_gather4_b_cl"),
1449 (0x47, "image_gather4_lz"),
1450 (0x48, "image_gather4_c"),
1451 (0x49, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet.
1452 #(0x4a, "image_gather4h_pck"), VEGA only?
1453 #(0x4b, "image_gather8h_pck"), VGEA only?
1454 (0x4c, "image_gather4_c_l"),
1455 (0x4d, "image_gather4_c_b"),
1456 (0x4e, "image_gather4_c_b_cl"),
1457 (0x4f, "image_gather4_c_lz"),
1458 (0x50, "image_gather4_o"),
1459 (0x51, "image_gather4_cl_o"),
1460 (0x54, "image_gather4_l_o"),
1461 (0x55, "image_gather4_b_o"),
1462 (0x56, "image_gather4_b_cl_o"),
1463 (0x57, "image_gather4_lz_o"),
1464 (0x58, "image_gather4_c_o"),
1465 (0x59, "image_gather4_c_cl_o"),
1466 (0x5c, "image_gather4_c_l_o"),
1467 (0x5d, "image_gather4_c_b_o"),
1468 (0x5e, "image_gather4_c_b_cl_o"),
1469 (0x5f, "image_gather4_c_lz_o"),
1471 # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
1472 for (code
, name
) in IMAGE_GATHER4
:
1473 opcode(name
, code
, code
, code
, Format
.MIMG
)
1477 #GFX7, GFX8_9, GFX10
1478 (0x08, 0x10, 0x08, "flat_load_ubyte"),
1479 (0x09, 0x11, 0x09, "flat_load_sbyte"),
1480 (0x0a, 0x12, 0x0a, "flat_load_ushort"),
1481 (0x0b, 0x13, 0x0b, "flat_load_sshort"),
1482 (0x0c, 0x14, 0x0c, "flat_load_dword"),
1483 (0x0d, 0x15, 0x0d, "flat_load_dwordx2"),
1484 (0x0f, 0x16, 0x0f, "flat_load_dwordx3"),
1485 (0x0e, 0x17, 0x0e, "flat_load_dwordx4"),
1486 (0x18, 0x18, 0x18, "flat_store_byte"),
1487 ( -1, 0x19, 0x19, "flat_store_byte_d16_hi"),
1488 (0x1a, 0x1a, 0x1a, "flat_store_short"),
1489 ( -1, 0x1b, 0x1b, "flat_store_short_d16_hi"),
1490 (0x1c, 0x1c, 0x1c, "flat_store_dword"),
1491 (0x1d, 0x1d, 0x1d, "flat_store_dwordx2"),
1492 (0x1f, 0x1e, 0x1f, "flat_store_dwordx3"),
1493 (0x1e, 0x1f, 0x1e, "flat_store_dwordx4"),
1494 ( -1, 0x20, 0x20, "flat_load_ubyte_d16"),
1495 ( -1, 0x21, 0x21, "flat_load_ubyte_d16_hi"),
1496 ( -1, 0x22, 0x22, "flat_load_sbyte_d16"),
1497 ( -1, 0x23, 0x23, "flat_load_sbyte_d16_hi"),
1498 ( -1, 0x24, 0x24, "flat_load_short_d16"),
1499 ( -1, 0x25, 0x25, "flat_load_short_d16_hi"),
1500 (0x30, 0x40, 0x30, "flat_atomic_swap"),
1501 (0x31, 0x41, 0x31, "flat_atomic_cmpswap"),
1502 (0x32, 0x42, 0x32, "flat_atomic_add"),
1503 (0x33, 0x43, 0x33, "flat_atomic_sub"),
1504 (0x35, 0x44, 0x35, "flat_atomic_smin"),
1505 (0x36, 0x45, 0x36, "flat_atomic_umin"),
1506 (0x37, 0x46, 0x37, "flat_atomic_smax"),
1507 (0x38, 0x47, 0x38, "flat_atomic_umax"),
1508 (0x39, 0x48, 0x39, "flat_atomic_and"),
1509 (0x3a, 0x49, 0x3a, "flat_atomic_or"),
1510 (0x3b, 0x4a, 0x3b, "flat_atomic_xor"),
1511 (0x3c, 0x4b, 0x3c, "flat_atomic_inc"),
1512 (0x3d, 0x4c, 0x3d, "flat_atomic_dec"),
1513 (0x3e, -1, 0x3e, "flat_atomic_fcmpswap"),
1514 (0x3f, -1, 0x3f, "flat_atomic_fmin"),
1515 (0x40, -1, 0x40, "flat_atomic_fmax"),
1516 (0x50, 0x60, 0x50, "flat_atomic_swap_x2"),
1517 (0x51, 0x61, 0x51, "flat_atomic_cmpswap_x2"),
1518 (0x52, 0x62, 0x52, "flat_atomic_add_x2"),
1519 (0x53, 0x63, 0x53, "flat_atomic_sub_x2"),
1520 (0x55, 0x64, 0x55, "flat_atomic_smin_x2"),
1521 (0x56, 0x65, 0x56, "flat_atomic_umin_x2"),
1522 (0x57, 0x66, 0x57, "flat_atomic_smax_x2"),
1523 (0x58, 0x67, 0x58, "flat_atomic_umax_x2"),
1524 (0x59, 0x68, 0x59, "flat_atomic_and_x2"),
1525 (0x5a, 0x69, 0x5a, "flat_atomic_or_x2"),
1526 (0x5b, 0x6a, 0x5b, "flat_atomic_xor_x2"),
1527 (0x5c, 0x6b, 0x5c, "flat_atomic_inc_x2"),
1528 (0x5d, 0x6c, 0x5d, "flat_atomic_dec_x2"),
1529 (0x5e, -1, 0x5e, "flat_atomic_fcmpswap_x2"),
1530 (0x5f, -1, 0x5f, "flat_atomic_fmin_x2"),
1531 (0x60, -1, 0x60, "flat_atomic_fmax_x2"),
1533 for (gfx7
, gfx8
, gfx10
, name
) in FLAT
:
1534 opcode(name
, gfx7
, gfx8
, gfx10
, Format
.FLAT
, is_atomic
= "atomic" in name
)
1538 (0x10, 0x08, "global_load_ubyte"),
1539 (0x11, 0x09, "global_load_sbyte"),
1540 (0x12, 0x0a, "global_load_ushort"),
1541 (0x13, 0x0b, "global_load_sshort"),
1542 (0x14, 0x0c, "global_load_dword"),
1543 (0x15, 0x0d, "global_load_dwordx2"),
1544 (0x16, 0x0f, "global_load_dwordx3"),
1545 (0x17, 0x0e, "global_load_dwordx4"),
1546 (0x18, 0x18, "global_store_byte"),
1547 (0x19, 0x19, "global_store_byte_d16_hi"),
1548 (0x1a, 0x1a, "global_store_short"),
1549 (0x1b, 0x1b, "global_store_short_d16_hi"),
1550 (0x1c, 0x1c, "global_store_dword"),
1551 (0x1d, 0x1d, "global_store_dwordx2"),
1552 (0x1e, 0x1f, "global_store_dwordx3"),
1553 (0x1f, 0x1e, "global_store_dwordx4"),
1554 (0x20, 0x20, "global_load_ubyte_d16"),
1555 (0x21, 0x21, "global_load_ubyte_d16_hi"),
1556 (0x22, 0x22, "global_load_sbyte_d16"),
1557 (0x23, 0x23, "global_load_sbyte_d16_hi"),
1558 (0x24, 0x24, "global_load_short_d16"),
1559 (0x25, 0x25, "global_load_short_d16_hi"),
1560 (0x40, 0x30, "global_atomic_swap"),
1561 (0x41, 0x31, "global_atomic_cmpswap"),
1562 (0x42, 0x32, "global_atomic_add"),
1563 (0x43, 0x33, "global_atomic_sub"),
1564 (0x44, 0x35, "global_atomic_smin"),
1565 (0x45, 0x36, "global_atomic_umin"),
1566 (0x46, 0x37, "global_atomic_smax"),
1567 (0x47, 0x38, "global_atomic_umax"),
1568 (0x48, 0x39, "global_atomic_and"),
1569 (0x49, 0x3a, "global_atomic_or"),
1570 (0x4a, 0x3b, "global_atomic_xor"),
1571 (0x4b, 0x3c, "global_atomic_inc"),
1572 (0x4c, 0x3d, "global_atomic_dec"),
1573 ( -1, 0x3e, "global_atomic_fcmpswap"),
1574 ( -1, 0x3f, "global_atomic_fmin"),
1575 ( -1, 0x40, "global_atomic_fmax"),
1576 (0x60, 0x50, "global_atomic_swap_x2"),
1577 (0x61, 0x51, "global_atomic_cmpswap_x2"),
1578 (0x62, 0x52, "global_atomic_add_x2"),
1579 (0x63, 0x53, "global_atomic_sub_x2"),
1580 (0x64, 0x55, "global_atomic_smin_x2"),
1581 (0x65, 0x56, "global_atomic_umin_x2"),
1582 (0x66, 0x57, "global_atomic_smax_x2"),
1583 (0x67, 0x58, "global_atomic_umax_x2"),
1584 (0x68, 0x59, "global_atomic_and_x2"),
1585 (0x69, 0x5a, "global_atomic_or_x2"),
1586 (0x6a, 0x5b, "global_atomic_xor_x2"),
1587 (0x6b, 0x5c, "global_atomic_inc_x2"),
1588 (0x6c, 0x5d, "global_atomic_dec_x2"),
1589 ( -1, 0x5e, "global_atomic_fcmpswap_x2"),
1590 ( -1, 0x5f, "global_atomic_fmin_x2"),
1591 ( -1, 0x60, "global_atomic_fmax_x2"),
1593 for (gfx8
, gfx10
, name
) in GLOBAL
:
1594 opcode(name
, -1, gfx8
, gfx10
, Format
.GLOBAL
, is_atomic
= "atomic" in name
)
1598 (0x10, 0x08, "scratch_load_ubyte"),
1599 (0x11, 0x09, "scratch_load_sbyte"),
1600 (0x12, 0x0a, "scratch_load_ushort"),
1601 (0x13, 0x0b, "scratch_load_sshort"),
1602 (0x14, 0x0c, "scratch_load_dword"),
1603 (0x15, 0x0d, "scratch_load_dwordx2"),
1604 (0x16, 0x0f, "scratch_load_dwordx3"),
1605 (0x17, 0x0e, "scratch_load_dwordx4"),
1606 (0x18, 0x18, "scratch_store_byte"),
1607 (0x19, 0x19, "scratch_store_byte_d16_hi"),
1608 (0x1a, 0x1a, "scratch_store_short"),
1609 (0x1b, 0x1b, "scratch_store_short_d16_hi"),
1610 (0x1c, 0x1c, "scratch_store_dword"),
1611 (0x1d, 0x1d, "scratch_store_dwordx2"),
1612 (0x1e, 0x1f, "scratch_store_dwordx3"),
1613 (0x1f, 0x1e, "scratch_store_dwordx4"),
1614 (0x20, 0x20, "scratch_load_ubyte_d16"),
1615 (0x21, 0x21, "scratch_load_ubyte_d16_hi"),
1616 (0x22, 0x22, "scratch_load_sbyte_d16"),
1617 (0x23, 0x23, "scratch_load_sbyte_d16_hi"),
1618 (0x24, 0x24, "scratch_load_short_d16"),
1619 (0x25, 0x25, "scratch_load_short_d16_hi"),
1621 for (gfx8
, gfx10
, name
) in SCRATCH
:
1622 opcode(name
, -1, gfx8
, gfx10
, Format
.SCRATCH
)
1624 # check for duplicate opcode numbers
1625 for ver
in ['gfx9', 'gfx10']:
1627 for op
in opcodes
.values():
1628 if op
.format
in [Format
.PSEUDO
, Format
.PSEUDO_BRANCH
, Format
.PSEUDO_BARRIER
, Format
.PSEUDO_REDUCTION
]:
1631 num
= getattr(op
, 'opcode_' + ver
)
1635 key
= (op
.format
, num
)
1637 if key
in op_to_name
:
1639 names
= set([op_to_name
[key
], op
.name
])
1640 if ver
in ['gfx8', 'gfx9'] and names
== set(['v_mul_lo_i32', 'v_mul_lo_u32']):
1643 print('%s and %s share the same opcode number (%s)' % (op_to_name
[key
], op
.name
, ver
))
1646 op_to_name
[key
] = op
.name