2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "vc4_qpu_defines.h"
30 static const char *qpu_add_opcodes
[] = {
32 [QPU_A_FADD
] = "fadd",
33 [QPU_A_FSUB
] = "fsub",
34 [QPU_A_FMIN
] = "fmin",
35 [QPU_A_FMAX
] = "fmax",
36 [QPU_A_FMINABS
] = "fminabs",
37 [QPU_A_FMAXABS
] = "fmaxabs",
38 [QPU_A_FTOI
] = "ftoi",
39 [QPU_A_ITOF
] = "itof",
53 [QPU_A_V8ADDS
] = "v8adds",
54 [QPU_A_V8SUBS
] = "v8subs",
57 static const char *qpu_mul_opcodes
[] = {
59 [QPU_M_FMUL
] = "fmul",
60 [QPU_M_MUL24
] = "mul24",
61 [QPU_M_V8MULD
] = "v8muld",
62 [QPU_M_V8MIN
] = "v8min",
63 [QPU_M_V8MAX
] = "v8max",
64 [QPU_M_V8ADDS
] = "v8adds",
65 [QPU_M_V8SUBS
] = "v8subs",
68 static const char *qpu_sig
[] = {
69 [QPU_SIG_SW_BREAKPOINT
] = "sig_brk",
71 [QPU_SIG_THREAD_SWITCH
] = "sig_switch",
72 [QPU_SIG_PROG_END
] = "sig_end",
73 [QPU_SIG_WAIT_FOR_SCOREBOARD
] = "sig_wait_score",
74 [QPU_SIG_SCOREBOARD_UNLOCK
] = "sig_unlock_score",
75 [QPU_SIG_LAST_THREAD_SWITCH
] = "sig_thread_switch",
76 [QPU_SIG_COVERAGE_LOAD
] = "sig_coverage_load",
77 [QPU_SIG_COLOR_LOAD
] = "sig_color_load",
78 [QPU_SIG_COLOR_LOAD_END
] = "sig_color_load_end",
79 [QPU_SIG_LOAD_TMU0
] = "load_tmu0",
80 [QPU_SIG_LOAD_TMU1
] = "load_tmu1",
81 [QPU_SIG_ALPHA_MASK_LOAD
] = "sig_alpha_mask_load",
82 [QPU_SIG_SMALL_IMM
] = "sig_small_imm",
83 [QPU_SIG_LOAD_IMM
] = "sig_load_imm",
84 [QPU_SIG_BRANCH
] = "sig_branch",
87 static const char *qpu_pack_mul
[] = {
88 [QPU_PACK_MUL_NOP
] = "",
89 [QPU_PACK_MUL_8888
] = "8888",
90 [QPU_PACK_MUL_8A
] = "8a",
91 [QPU_PACK_MUL_8B
] = "8b",
92 [QPU_PACK_MUL_8C
] = "8c",
93 [QPU_PACK_MUL_8D
] = "8d",
96 /* The QPU unpack for A and R4 files can be described the same, it's just that
97 * the R4 variants are convert-to-float only, with no int support.
99 static const char *qpu_unpack
[] = {
100 [QPU_UNPACK_NOP
] = "",
101 [QPU_UNPACK_16A
] = "16a",
102 [QPU_UNPACK_16B
] = "16b",
103 [QPU_UNPACK_8D_REP
] = "8d_rep",
104 [QPU_UNPACK_8A
] = "8a",
105 [QPU_UNPACK_8B
] = "8b",
106 [QPU_UNPACK_8C
] = "8c",
107 [QPU_UNPACK_8D
] = "8d",
110 static const char *special_read_a
[] = {
133 static const char *special_read_b
[] = {
157 * This has the B-file descriptions for register writes.
159 * Since only a couple of regs are different between A and B, the A overrides
160 * are in get_special_write_desc().
162 static const char *special_write
[] = {
167 [QPU_W_TMU_NOSWAP
] = "tmu_noswap",
169 [QPU_W_HOST_INT
] = "host_int",
171 [QPU_W_UNIFORMS_ADDRESS
] = "uniforms_addr",
172 [QPU_W_QUAD_XY
] = "quad_y",
173 [QPU_W_MS_FLAGS
] = "ms_flags",
174 [QPU_W_TLB_STENCIL_SETUP
] = "tlb_stencil_setup",
175 [QPU_W_TLB_Z
] = "tlb_z",
176 [QPU_W_TLB_COLOR_MS
] = "tlb_color_ms",
177 [QPU_W_TLB_COLOR_ALL
] = "tlb_color_all",
179 [QPU_W_VPMVCD_SETUP
] = "vw_setup",
180 [QPU_W_VPM_ADDR
] = "vw_addr",
181 [QPU_W_MUTEX_RELEASE
] = "mutex_release",
182 [QPU_W_SFU_RECIP
] = "sfu_recip",
183 [QPU_W_SFU_RECIPSQRT
] = "sfu_recipsqrt",
184 [QPU_W_SFU_EXP
] = "sfu_exp",
185 [QPU_W_SFU_LOG
] = "sfu_log",
186 [QPU_W_TMU0_S
] = "tmu0_s",
187 [QPU_W_TMU0_T
] = "tmu0_t",
188 [QPU_W_TMU0_R
] = "tmu0_r",
189 [QPU_W_TMU0_B
] = "tmu0_b",
190 [QPU_W_TMU1_S
] = "tmu1_s",
191 [QPU_W_TMU1_T
] = "tmu1_t",
192 [QPU_W_TMU1_R
] = "tmu1_r",
193 [QPU_W_TMU1_B
] = "tmu1_b",
196 static const char *qpu_pack_a
[] = {
197 [QPU_PACK_A_NOP
] = "",
198 [QPU_PACK_A_16A
] = ".16a",
199 [QPU_PACK_A_16B
] = ".16b",
200 [QPU_PACK_A_8888
] = ".8888",
201 [QPU_PACK_A_8A
] = ".8a",
202 [QPU_PACK_A_8B
] = ".8b",
203 [QPU_PACK_A_8C
] = ".8c",
204 [QPU_PACK_A_8D
] = ".8d",
206 [QPU_PACK_A_32_SAT
] = ".sat",
207 [QPU_PACK_A_16A_SAT
] = ".16a.sat",
208 [QPU_PACK_A_16B_SAT
] = ".16b.sat",
209 [QPU_PACK_A_8888_SAT
] = ".8888.sat",
210 [QPU_PACK_A_8A_SAT
] = ".8a.sat",
211 [QPU_PACK_A_8B_SAT
] = ".8b.sat",
212 [QPU_PACK_A_8C_SAT
] = ".8c.sat",
213 [QPU_PACK_A_8D_SAT
] = ".8d.sat",
216 static const char *qpu_condflags
[] = {
217 [QPU_COND_NEVER
] = ".never",
218 [QPU_COND_ALWAYS
] = "",
219 [QPU_COND_ZS
] = ".zs",
220 [QPU_COND_ZC
] = ".zc",
221 [QPU_COND_NS
] = ".ns",
222 [QPU_COND_NC
] = ".nc",
223 [QPU_COND_CS
] = ".cs",
224 [QPU_COND_CC
] = ".cc",
227 #define DESC(array, index) \
228 ((index >= ARRAY_SIZE(array) || !(array)[index]) ? \
229 "???" : (array)[index])
232 get_special_write_desc(int reg
, bool is_a
)
238 case QPU_W_VPMVCD_SETUP
:
245 return special_write
[reg
];
249 vc4_qpu_disasm_pack_mul(FILE *out
, uint32_t pack
)
251 fprintf(out
, ".%s", DESC(qpu_pack_mul
, pack
));
255 vc4_qpu_disasm_pack_a(FILE *out
, uint32_t pack
)
257 fprintf(out
, "%s", DESC(qpu_pack_a
, pack
));
261 vc4_qpu_disasm_unpack(FILE *out
, uint32_t unpack
)
263 if (unpack
!= QPU_UNPACK_NOP
)
264 fprintf(out
, ".%s", DESC(qpu_unpack
, unpack
));
268 print_alu_dst(uint64_t inst
, bool is_mul
)
270 bool is_a
= is_mul
== ((inst
& QPU_WS
) != 0);
271 uint32_t waddr
= (is_mul
?
272 QPU_GET_FIELD(inst
, QPU_WADDR_MUL
) :
273 QPU_GET_FIELD(inst
, QPU_WADDR_ADD
));
274 const char *file
= is_a
? "a" : "b";
275 uint32_t pack
= QPU_GET_FIELD(inst
, QPU_PACK
);
278 fprintf(stderr
, "r%s%d", file
, waddr
);
279 else if (get_special_write_desc(waddr
, is_a
))
280 fprintf(stderr
, "%s", get_special_write_desc(waddr
, is_a
));
282 fprintf(stderr
, "%s%d?", file
, waddr
);
284 if (is_mul
&& (inst
& QPU_PM
)) {
285 vc4_qpu_disasm_pack_mul(stderr
, pack
);
286 } else if (is_a
&& !(inst
& QPU_PM
)) {
287 vc4_qpu_disasm_pack_a(stderr
, pack
);
292 print_alu_src(uint64_t inst
, uint32_t mux
)
294 bool is_a
= mux
!= QPU_MUX_B
;
295 const char *file
= is_a
? "a" : "b";
296 uint32_t raddr
= (is_a
?
297 QPU_GET_FIELD(inst
, QPU_RADDR_A
) :
298 QPU_GET_FIELD(inst
, QPU_RADDR_B
));
299 uint32_t unpack
= QPU_GET_FIELD(inst
, QPU_UNPACK
);
301 if (mux
<= QPU_MUX_R5
)
302 fprintf(stderr
, "r%d", mux
);
304 QPU_GET_FIELD(inst
, QPU_SIG
) == QPU_SIG_SMALL_IMM
) {
305 uint32_t si
= QPU_GET_FIELD(inst
, QPU_SMALL_IMM
);
307 fprintf(stderr
, "%d", si
);
309 fprintf(stderr
, "%d", -16 + (si
- 16));
311 fprintf(stderr
, "%.1f", (float)(1 << (si
- 32)));
313 fprintf(stderr
, "%f", 1.0f
/ (1 << (48 - si
)));
315 fprintf(stderr
, "<bad imm %d>", si
);
316 } else if (raddr
<= 31)
317 fprintf(stderr
, "r%s%d", file
, raddr
);
320 fprintf(stderr
, "%s", DESC(special_read_a
, raddr
- 32));
322 fprintf(stderr
, "%s", DESC(special_read_b
, raddr
- 32));
325 if (((mux
== QPU_MUX_A
&& !(inst
& QPU_PM
)) ||
326 (mux
== QPU_MUX_R4
&& (inst
& QPU_PM
)))) {
327 vc4_qpu_disasm_unpack(stderr
, unpack
);
332 print_add_op(uint64_t inst
)
334 uint32_t op_add
= QPU_GET_FIELD(inst
, QPU_OP_ADD
);
335 uint32_t cond
= QPU_GET_FIELD(inst
, QPU_COND_ADD
);
336 bool is_mov
= (op_add
== QPU_A_OR
&&
337 QPU_GET_FIELD(inst
, QPU_ADD_A
) ==
338 QPU_GET_FIELD(inst
, QPU_ADD_B
));
340 fprintf(stderr
, "%s%s%s ",
341 is_mov
? "mov" : DESC(qpu_add_opcodes
, op_add
),
342 ((inst
& QPU_SF
) && op_add
!= QPU_A_NOP
) ? ".sf" : "",
343 op_add
!= QPU_A_NOP
? DESC(qpu_condflags
, cond
) : "");
345 print_alu_dst(inst
, false);
346 fprintf(stderr
, ", ");
348 print_alu_src(inst
, QPU_GET_FIELD(inst
, QPU_ADD_A
));
351 fprintf(stderr
, ", ");
353 print_alu_src(inst
, QPU_GET_FIELD(inst
, QPU_ADD_B
));
358 print_mul_op(uint64_t inst
)
360 uint32_t op_add
= QPU_GET_FIELD(inst
, QPU_OP_ADD
);
361 uint32_t op_mul
= QPU_GET_FIELD(inst
, QPU_OP_MUL
);
362 uint32_t cond
= QPU_GET_FIELD(inst
, QPU_COND_MUL
);
363 bool is_mov
= (op_mul
== QPU_M_V8MIN
&&
364 QPU_GET_FIELD(inst
, QPU_MUL_A
) ==
365 QPU_GET_FIELD(inst
, QPU_MUL_B
));
367 fprintf(stderr
, "%s%s%s ",
368 is_mov
? "mov" : DESC(qpu_mul_opcodes
, op_mul
),
369 ((inst
& QPU_SF
) && op_add
== QPU_A_NOP
) ? ".sf" : "",
370 op_mul
!= QPU_M_NOP
? DESC(qpu_condflags
, cond
) : "");
372 print_alu_dst(inst
, true);
373 fprintf(stderr
, ", ");
375 print_alu_src(inst
, QPU_GET_FIELD(inst
, QPU_MUL_A
));
378 fprintf(stderr
, ", ");
379 print_alu_src(inst
, QPU_GET_FIELD(inst
, QPU_MUL_B
));
384 print_load_imm(uint64_t inst
)
387 uint32_t waddr_add
= QPU_GET_FIELD(inst
, QPU_WADDR_ADD
);
388 uint32_t waddr_mul
= QPU_GET_FIELD(inst
, QPU_WADDR_MUL
);
389 uint32_t cond_add
= QPU_GET_FIELD(inst
, QPU_COND_ADD
);
390 uint32_t cond_mul
= QPU_GET_FIELD(inst
, QPU_COND_MUL
);
392 fprintf(stderr
, "load_imm ");
393 print_alu_dst(inst
, false);
394 fprintf(stderr
, "%s, ", (waddr_add
!= QPU_W_NOP
?
395 DESC(qpu_condflags
, cond_add
) : ""));
396 print_alu_dst(inst
, true);
397 fprintf(stderr
, "%s, ", (waddr_mul
!= QPU_W_NOP
?
398 DESC(qpu_condflags
, cond_mul
) : ""));
399 fprintf(stderr
, "0x%08x (%f)", imm
, uif(imm
));
403 vc4_qpu_disasm(const uint64_t *instructions
, int num_instructions
)
405 for (int i
= 0; i
< num_instructions
; i
++) {
406 uint64_t inst
= instructions
[i
];
407 uint32_t sig
= QPU_GET_FIELD(inst
, QPU_SIG
);
411 fprintf(stderr
, "branch");
413 case QPU_SIG_LOAD_IMM
:
414 print_load_imm(inst
);
417 if (sig
!= QPU_SIG_NONE
)
418 fprintf(stderr
, "%s ", DESC(qpu_sig
, sig
));
420 fprintf(stderr
, " ; ");
425 if (num_instructions
!= 1)
426 fprintf(stderr
, "\n");