2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 #include <util/u_debug.h>
34 #include "instr-a3xx.h"
37 static enum debug_t debug
;
39 #define printf debug_printf
41 static const char *levels
[] = {
60 static const char *component
= "xyzw";
62 static const char *type
[] = {
78 struct shader_stats
*stats
;
80 /* we have to process the dst register after src to avoid tripping up
81 * the read-before-write detection
87 /* current instruction repeat flag: */
89 /* current instruction repeat indx/offset (for --expand): */
92 /* tracking for register usage */
95 regmask_t rbw
; /* read before write */
96 regmask_t war
; /* write after read */
101 static const char *float_imms
[] = {
116 static void print_reg(struct disasm_ctx
*ctx
, reg_t reg
, bool full
,
117 bool is_float
, bool r
,
118 bool c
, bool im
, bool neg
, bool abs
, bool addr_rel
)
120 const char type
= c
? 'c' : 'r';
122 // XXX I prefer - and || for neg/abs, but preserving format used
123 // by libllvm-a3xx for easy diffing..
126 fprintf(ctx
->out
, "(absneg)");
128 fprintf(ctx
->out
, "(neg)");
130 fprintf(ctx
->out
, "(abs)");
133 fprintf(ctx
->out
, "(r)");
136 if (is_float
&& full
&& reg
.iim_val
< ARRAY_SIZE(float_imms
)) {
137 fprintf(ctx
->out
, "(%s)", float_imms
[reg
.iim_val
]);
139 fprintf(ctx
->out
, "%d", reg
.iim_val
);
141 } else if (addr_rel
) {
142 /* I would just use %+d but trying to make it diff'able with
146 fprintf(ctx
->out
, "%s%c<a0.x - %d>", full
? "" : "h", type
, -reg
.iim_val
);
147 else if (reg
.iim_val
> 0)
148 fprintf(ctx
->out
, "%s%c<a0.x + %d>", full
? "" : "h", type
, reg
.iim_val
);
150 fprintf(ctx
->out
, "%s%c<a0.x>", full
? "" : "h", type
);
151 } else if ((reg
.num
== REG_A0
) && !c
) {
152 /* This matches libllvm output, the second (scalar) address register
153 * seems to be called a1.x instead of a0.y.
155 fprintf(ctx
->out
, "a%d.x", reg
.comp
);
156 } else if ((reg
.num
== REG_P0
) && !c
) {
157 fprintf(ctx
->out
, "p0.%c", component
[reg
.comp
]);
159 fprintf(ctx
->out
, "%s%c%d.%c", full
? "" : "h", type
, reg
.num
, component
[reg
.comp
]);
160 if (0 && full
&& !c
) {
165 fprintf(ctx
->out
, " (hr%d.%c,hr%d.%c)", hr0
.num
, component
[hr0
.comp
], hr1
.num
, component
[hr1
.comp
]);
170 static void regmask_set(regmask_t
*regmask
, unsigned num
, bool full
)
172 ir3_assert(num
< MAX_REG
);
173 __regmask_set(regmask
, !full
, num
);
176 static void regmask_clear(regmask_t
*regmask
, unsigned num
, bool full
)
178 ir3_assert(num
< MAX_REG
);
179 __regmask_clear(regmask
, !full
, num
);
182 static unsigned regmask_get(regmask_t
*regmask
, unsigned num
, bool full
)
184 ir3_assert(num
< MAX_REG
);
185 return __regmask_get(regmask
, !full
, num
);
188 static unsigned regidx(reg_t reg
)
190 return (4 * reg
.num
) + reg
.comp
;
193 static reg_t
idxreg(unsigned idx
)
201 static void print_sequence(struct disasm_ctx
*ctx
, int first
, int last
)
203 if (first
!= MAX_REG
) {
205 fprintf(ctx
->out
, " %d", first
);
207 fprintf(ctx
->out
, " %d-%d", first
, last
);
212 static int print_regs(struct disasm_ctx
*ctx
, regmask_t
*regmask
, bool full
)
214 int num
, max
= 0, cnt
= 0;
217 first
= last
= MAX_REG
;
219 for (num
= 0; num
< MAX_REG
; num
++) {
220 if (regmask_get(regmask
, num
, full
)) {
221 if (num
!= (last
+ 1)) {
222 print_sequence(ctx
, first
, last
);
232 print_sequence(ctx
, first
, last
);
234 fprintf(ctx
->out
, " (cnt=%d, max=%d)", cnt
, max
);
239 static void print_reg_stats(struct disasm_ctx
*ctx
)
241 int fullreg
, halfreg
;
243 fprintf(ctx
->out
, "%sRegister Stats:\n", levels
[ctx
->level
]);
244 fprintf(ctx
->out
, "%s- used (half):", levels
[ctx
->level
]);
245 halfreg
= print_regs(ctx
, &ctx
->regs
.used
, false);
246 fprintf(ctx
->out
, "\n");
247 fprintf(ctx
->out
, "%s- used (full):", levels
[ctx
->level
]);
248 fullreg
= print_regs(ctx
, &ctx
->regs
.used
, true);
249 fprintf(ctx
->out
, "\n");
250 fprintf(ctx
->out
, "%s- input (half):", levels
[ctx
->level
]);
251 print_regs(ctx
, &ctx
->regs
.rbw
, false);
252 fprintf(ctx
->out
, "\n");
253 fprintf(ctx
->out
, "%s- input (full):", levels
[ctx
->level
]);
254 print_regs(ctx
, &ctx
->regs
.rbw
, true);
255 fprintf(ctx
->out
, "\n");
256 fprintf(ctx
->out
, "%s- max const: %u\n", levels
[ctx
->level
], ctx
->regs
.max_const
);
257 fprintf(ctx
->out
, "\n");
258 fprintf(ctx
->out
, "%s- output (half):", levels
[ctx
->level
]);
259 print_regs(ctx
, &ctx
->regs
.war
, false);
260 fprintf(ctx
->out
, " (estimated)\n");
261 fprintf(ctx
->out
, "%s- output (full):", levels
[ctx
->level
]);
262 print_regs(ctx
, &ctx
->regs
.war
, true);
263 fprintf(ctx
->out
, " (estimated)\n");
265 /* convert to vec4, which is the granularity that registers are
266 * assigned to shader:
268 fullreg
= (fullreg
+ 3) / 4;
269 halfreg
= ctx
->regs
.used
.mergedregs
? 0 : (halfreg
+ 3) / 4;
271 // Note this count of instructions includes rptN, which matches
272 // up to how mesa prints this:
273 fprintf(ctx
->out
, "%s- shaderdb: %d instructions, %d nops, %d non-nops, "
274 "(%d instlen), %u last-baryf, %d half, %d full\n",
275 levels
[ctx
->level
], ctx
->stats
->instructions
, ctx
->stats
->nops
,
276 ctx
->stats
->instructions
- ctx
->stats
->nops
, ctx
->stats
->instlen
,
277 ctx
->stats
->last_baryf
, halfreg
, fullreg
);
278 fprintf(ctx
->out
, "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, "
279 "%u cat4, %u cat5, %u cat6, %u cat7\n",
281 ctx
->stats
->instrs_per_cat
[0],
282 ctx
->stats
->instrs_per_cat
[1],
283 ctx
->stats
->instrs_per_cat
[2],
284 ctx
->stats
->instrs_per_cat
[3],
285 ctx
->stats
->instrs_per_cat
[4],
286 ctx
->stats
->instrs_per_cat
[5],
287 ctx
->stats
->instrs_per_cat
[6],
288 ctx
->stats
->instrs_per_cat
[7]);
289 fprintf(ctx
->out
, "%s- shaderdb: %d (ss), %d (sy)\n", levels
[ctx
->level
],
290 ctx
->stats
->ss
, ctx
->stats
->sy
);
293 static void process_reg_dst(struct disasm_ctx
*ctx
)
295 if (!ctx
->last_dst_valid
)
298 /* ignore dummy writes (ie. r63.x): */
299 if (!VALIDREG(ctx
->last_dst
))
302 for (unsigned i
= 0; i
<= ctx
->repeat
; i
++) {
303 unsigned dst
= ctx
->last_dst
+ i
;
305 regmask_set(&ctx
->regs
.war
, dst
, ctx
->last_dst_full
);
306 regmask_set(&ctx
->regs
.used
, dst
, ctx
->last_dst_full
);
309 ctx
->last_dst_valid
= false;
311 static void print_reg_dst(struct disasm_ctx
*ctx
, reg_t reg
, bool full
, bool addr_rel
)
313 /* presumably the special registers a0.c and p0.c don't count.. */
314 if (!(addr_rel
|| (reg
.num
== REG_A0
) || (reg
.num
== REG_P0
))) {
315 ctx
->last_dst
= regidx(reg
);
316 ctx
->last_dst_full
= full
;
317 ctx
->last_dst_valid
= true;
319 reg
= idxreg(regidx(reg
) + ctx
->repeatidx
);
320 print_reg(ctx
, reg
, full
, false, false, false, false, false, false, addr_rel
);
323 /* TODO switch to using reginfo struct everywhere, since more readable
324 * than passing a bunch of bools to print_reg_src
332 bool f
; /* src reg is interpreted as float, used for printing immediates */
339 static void print_src(struct disasm_ctx
*ctx
, struct reginfo
*info
)
341 reg_t reg
= info
->reg
;
343 /* presumably the special registers a0.c and p0.c don't count.. */
344 if (!(info
->addr_rel
|| info
->c
|| info
->im
||
345 (reg
.num
== REG_A0
) || (reg
.num
== REG_P0
))) {
346 int i
, num
= regidx(reg
);
347 for (i
= 0; i
<= ctx
->repeat
; i
++) {
348 unsigned src
= num
+ i
;
350 if (!regmask_get(&ctx
->regs
.used
, src
, info
->full
))
351 regmask_set(&ctx
->regs
.rbw
, src
, info
->full
);
353 regmask_clear(&ctx
->regs
.war
, src
, info
->full
);
354 regmask_set(&ctx
->regs
.used
, src
, info
->full
);
359 } else if (info
->c
) {
360 int i
, num
= regidx(reg
);
361 for (i
= 0; i
<= ctx
->repeat
; i
++) {
362 unsigned src
= num
+ i
;
364 ctx
->regs
.max_const
= MAX2(ctx
->regs
.max_const
, src
);
370 unsigned max
= (num
+ ctx
->repeat
+ 1 + 3) / 4;
371 if (max
> ctx
->stats
->constlen
)
372 ctx
->stats
->constlen
= max
;
376 reg
= idxreg(regidx(info
->reg
) + ctx
->repeatidx
);
378 print_reg(ctx
, reg
, info
->full
, info
->f
, info
->r
, info
->c
, info
->im
,
379 info
->neg
, info
->abs
, info
->addr_rel
);
382 //static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
384 // print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
387 static void print_instr_cat0(struct disasm_ctx
*ctx
, instr_t
*instr
)
389 static const struct {
394 [BRANCH_PLAIN
] = { "r", 1, false },
395 [BRANCH_OR
] = { "rao", 2, false },
396 [BRANCH_AND
] = { "raa", 2, false },
397 [BRANCH_CONST
] = { "rac", 0, true },
398 [BRANCH_ANY
] = { "any", 1, false },
399 [BRANCH_ALL
] = { "all", 1, false },
400 [BRANCH_X
] = { "rax", 0, false },
402 instr_cat0_t
*cat0
= &instr
->cat0
;
404 switch (instr_opc(instr
, ctx
->gpu_id
)) {
408 fprintf(ctx
->out
, " %sp0.%c", cat0
->inv0
? "!" : "",
409 component
[cat0
->comp0
]);
412 fprintf(ctx
->out
, "%s", brinfo
[cat0
->brtype
].suffix
);
413 if (brinfo
[cat0
->brtype
].idx
) {
414 fprintf(ctx
->out
, ".%u", cat0
->idx
);
416 if (brinfo
[cat0
->brtype
].nsrc
>= 1) {
417 fprintf(ctx
->out
, " %sp0.%c,", cat0
->inv0
? "!" : "",
418 component
[cat0
->comp0
]);
420 if (brinfo
[cat0
->brtype
].nsrc
>= 2) {
421 fprintf(ctx
->out
, " %sp0.%c,", cat0
->inv1
? "!" : "",
422 component
[cat0
->comp1
]);
424 fprintf(ctx
->out
, " #%d", cat0
->a3xx
.immed
);
431 fprintf(ctx
->out
, " #%d", cat0
->a3xx
.immed
);
435 if ((debug
& PRINT_VERBOSE
) && (cat0
->dummy3
|cat0
->dummy4
))
436 fprintf(ctx
->out
, "\t{0: %x,%x}", cat0
->dummy3
, cat0
->dummy4
);
439 static void print_instr_cat1(struct disasm_ctx
*ctx
, instr_t
*instr
)
441 instr_cat1_t
*cat1
= &instr
->cat1
;
444 fprintf(ctx
->out
, "(ul)");
446 if (cat1
->src_type
== cat1
->dst_type
) {
447 if ((cat1
->src_type
== TYPE_S16
) && (((reg_t
)cat1
->dst
).num
== REG_A0
)) {
448 /* special case (nmemonic?): */
449 fprintf(ctx
->out
, "mova");
451 fprintf(ctx
->out
, "mov.%s%s", type
[cat1
->src_type
], type
[cat1
->dst_type
]);
454 fprintf(ctx
->out
, "cov.%s%s", type
[cat1
->src_type
], type
[cat1
->dst_type
]);
457 fprintf(ctx
->out
, " ");
460 fprintf(ctx
->out
, "(even)");
463 fprintf(ctx
->out
, "(pos_infinity)");
465 print_reg_dst(ctx
, (reg_t
)(cat1
->dst
), type_size(cat1
->dst_type
) == 32,
468 fprintf(ctx
->out
, ", ");
470 /* ugg, have to special case this.. vs print_reg().. */
472 if (type_float(cat1
->src_type
))
473 fprintf(ctx
->out
, "(%f)", cat1
->fim_val
);
474 else if (type_uint(cat1
->src_type
))
475 fprintf(ctx
->out
, "0x%08x", cat1
->uim_val
);
477 fprintf(ctx
->out
, "%d", cat1
->iim_val
);
478 } else if (cat1
->src_rel
&& !cat1
->src_c
) {
479 /* I would just use %+d but trying to make it diff'able with
482 char type
= cat1
->src_rel_c
? 'c' : 'r';
483 const char *full
= (type_size(cat1
->src_type
) == 32) ? "" : "h";
485 fprintf(ctx
->out
, "%s%c<a0.x - %d>", full
, type
, -cat1
->off
);
486 else if (cat1
->off
> 0)
487 fprintf(ctx
->out
, "%s%c<a0.x + %d>", full
, type
, cat1
->off
);
489 fprintf(ctx
->out
, "%s%c<a0.x>", full
, type
);
491 struct reginfo src
= {
492 .reg
= (reg_t
)cat1
->src
,
493 .full
= type_size(cat1
->src_type
) == 32,
498 print_src(ctx
, &src
);
501 if ((debug
& PRINT_VERBOSE
) && (cat1
->must_be_0
))
502 fprintf(ctx
->out
, "\t{1: %x}", cat1
->must_be_0
);
505 static void print_instr_cat2(struct disasm_ctx
*ctx
, instr_t
*instr
)
507 instr_cat2_t
*cat2
= &instr
->cat2
;
508 int opc
= _OPC(2, cat2
->opc
);
509 static const char *cond
[] = {
526 fprintf(ctx
->out
, ".%s", cond
[cat2
->cond
]);
530 fprintf(ctx
->out
, " ");
532 fprintf(ctx
->out
, "(ei)");
533 print_reg_dst(ctx
, (reg_t
)(cat2
->dst
), cat2
->full
^ cat2
->dst_half
, false);
534 fprintf(ctx
->out
, ", ");
536 struct reginfo src1
= {
538 .r
= cat2
->repeat
? cat2
->src1_r
: 0,
539 .f
= is_cat2_float(opc
),
541 .abs
= cat2
->src1_abs
,
542 .neg
= cat2
->src1_neg
,
545 if (cat2
->c1
.src1_c
) {
546 src1
.reg
= (reg_t
)(cat2
->c1
.src1
);
548 } else if (cat2
->rel1
.src1_rel
) {
549 src1
.reg
= (reg_t
)(cat2
->rel1
.src1
);
550 src1
.c
= cat2
->rel1
.src1_c
;
551 src1
.addr_rel
= true;
553 src1
.reg
= (reg_t
)(cat2
->src1
);
555 print_src(ctx
, &src1
);
557 struct reginfo src2
= {
558 .r
= cat2
->repeat
? cat2
->src2_r
: 0,
560 .f
= is_cat2_float(opc
),
561 .abs
= cat2
->src2_abs
,
562 .neg
= cat2
->src2_neg
,
580 /* these only have one src reg */
583 fprintf(ctx
->out
, ", ");
584 if (cat2
->c2
.src2_c
) {
585 src2
.reg
= (reg_t
)(cat2
->c2
.src2
);
587 } else if (cat2
->rel2
.src2_rel
) {
588 src2
.reg
= (reg_t
)(cat2
->rel2
.src2
);
589 src2
.c
= cat2
->rel2
.src2_c
;
590 src2
.addr_rel
= true;
592 src2
.reg
= (reg_t
)(cat2
->src2
);
594 print_src(ctx
, &src2
);
599 static void print_instr_cat3(struct disasm_ctx
*ctx
, instr_t
*instr
)
601 instr_cat3_t
*cat3
= &instr
->cat3
;
602 bool full
= instr_cat3_full(cat3
);
604 fprintf(ctx
->out
, " ");
605 print_reg_dst(ctx
, (reg_t
)(cat3
->dst
), full
^ cat3
->dst_half
, false);
606 fprintf(ctx
->out
, ", ");
608 struct reginfo src1
= {
609 .r
= cat3
->repeat
? cat3
->src1_r
: 0,
611 .neg
= cat3
->src1_neg
,
613 if (cat3
->c1
.src1_c
) {
614 src1
.reg
= (reg_t
)(cat3
->c1
.src1
);
616 } else if (cat3
->rel1
.src1_rel
) {
617 src1
.reg
= (reg_t
)(cat3
->rel1
.src1
);
618 src1
.c
= cat3
->rel1
.src1_c
;
619 src1
.addr_rel
= true;
621 src1
.reg
= (reg_t
)(cat3
->src1
);
623 print_src(ctx
, &src1
);
625 fprintf(ctx
->out
, ", ");
626 struct reginfo src2
= {
627 .reg
= (reg_t
)cat3
->src2
,
629 .r
= cat3
->repeat
? cat3
->src2_r
: 0,
631 .neg
= cat3
->src2_neg
,
633 print_src(ctx
, &src2
);
635 fprintf(ctx
->out
, ", ");
636 struct reginfo src3
= {
639 .neg
= cat3
->src3_neg
,
641 if (cat3
->c2
.src3_c
) {
642 src3
.reg
= (reg_t
)(cat3
->c2
.src3
);
644 } else if (cat3
->rel2
.src3_rel
) {
645 src3
.reg
= (reg_t
)(cat3
->rel2
.src3
);
646 src3
.c
= cat3
->rel2
.src3_c
;
647 src3
.addr_rel
= true;
649 src3
.reg
= (reg_t
)(cat3
->src3
);
651 print_src(ctx
, &src3
);
654 static void print_instr_cat4(struct disasm_ctx
*ctx
, instr_t
*instr
)
656 instr_cat4_t
*cat4
= &instr
->cat4
;
658 fprintf(ctx
->out
, " ");
659 print_reg_dst(ctx
, (reg_t
)(cat4
->dst
), cat4
->full
^ cat4
->dst_half
, false);
660 fprintf(ctx
->out
, ", ");
662 struct reginfo src
= {
666 .neg
= cat4
->src_neg
,
667 .abs
= cat4
->src_abs
,
670 src
.reg
= (reg_t
)(cat4
->c
.src
);
672 } else if (cat4
->rel
.src_rel
) {
673 src
.reg
= (reg_t
)(cat4
->rel
.src
);
674 src
.c
= cat4
->rel
.src_c
;
677 src
.reg
= (reg_t
)(cat4
->src
);
679 print_src(ctx
, &src
);
681 if ((debug
& PRINT_VERBOSE
) && (cat4
->dummy1
|cat4
->dummy2
))
682 fprintf(ctx
->out
, "\t{4: %x,%x}", cat4
->dummy1
, cat4
->dummy2
);
685 static void print_instr_cat5(struct disasm_ctx
*ctx
, instr_t
*instr
)
687 static const struct {
688 bool src1
, src2
, samp
, tex
;
690 [opc_op(OPC_ISAM
)] = { true, false, true, true, },
691 [opc_op(OPC_ISAML
)] = { true, true, true, true, },
692 [opc_op(OPC_ISAMM
)] = { true, false, true, true, },
693 [opc_op(OPC_SAM
)] = { true, false, true, true, },
694 [opc_op(OPC_SAMB
)] = { true, true, true, true, },
695 [opc_op(OPC_SAML
)] = { true, true, true, true, },
696 [opc_op(OPC_SAMGQ
)] = { true, false, true, true, },
697 [opc_op(OPC_GETLOD
)] = { true, false, true, true, },
698 [opc_op(OPC_CONV
)] = { true, true, true, true, },
699 [opc_op(OPC_CONVM
)] = { true, true, true, true, },
700 [opc_op(OPC_GETSIZE
)] = { true, false, false, true, },
701 [opc_op(OPC_GETBUF
)] = { false, false, false, true, },
702 [opc_op(OPC_GETPOS
)] = { true, false, false, true, },
703 [opc_op(OPC_GETINFO
)] = { false, false, false, true, },
704 [opc_op(OPC_DSX
)] = { true, false, false, false, },
705 [opc_op(OPC_DSY
)] = { true, false, false, false, },
706 [opc_op(OPC_GATHER4R
)] = { true, false, true, true, },
707 [opc_op(OPC_GATHER4G
)] = { true, false, true, true, },
708 [opc_op(OPC_GATHER4B
)] = { true, false, true, true, },
709 [opc_op(OPC_GATHER4A
)] = { true, false, true, true, },
710 [opc_op(OPC_SAMGP0
)] = { true, false, true, true, },
711 [opc_op(OPC_SAMGP1
)] = { true, false, true, true, },
712 [opc_op(OPC_SAMGP2
)] = { true, false, true, true, },
713 [opc_op(OPC_SAMGP3
)] = { true, false, true, true, },
714 [opc_op(OPC_DSXPP_1
)] = { true, false, false, false, },
715 [opc_op(OPC_DSYPP_1
)] = { true, false, false, false, },
716 [opc_op(OPC_RGETPOS
)] = { true, false, false, false, },
717 [opc_op(OPC_RGETINFO
)] = { false, false, false, false, },
720 static const struct {
725 } desc_features
[8] = {
726 [CAT5_NONUNIFORM
] = { .indirect
= true, },
727 [CAT5_UNIFORM
] = { .indirect
= true, .uniform
= true, },
728 [CAT5_BINDLESS_IMM
] = { .bindless
= true, },
729 [CAT5_BINDLESS_UNIFORM
] = {
734 [CAT5_BINDLESS_NONUNIFORM
] = {
738 [CAT5_BINDLESS_A1_IMM
] = {
742 [CAT5_BINDLESS_A1_UNIFORM
] = {
748 [CAT5_BINDLESS_A1_NONUNIFORM
] = {
755 instr_cat5_t
*cat5
= &instr
->cat5
;
759 cat5
->is_s2en_bindless
&&
760 desc_features
[cat5
->s2en_bindless
.desc_mode
].indirect
;
762 cat5
->is_s2en_bindless
&&
763 desc_features
[cat5
->s2en_bindless
.desc_mode
].bindless
;
765 cat5
->is_s2en_bindless
&&
766 desc_features
[cat5
->s2en_bindless
.desc_mode
].use_a1
;
768 cat5
->is_s2en_bindless
&&
769 desc_features
[cat5
->s2en_bindless
.desc_mode
].uniform
;
771 if (cat5
->is_3d
) fprintf(ctx
->out
, ".3d");
772 if (cat5
->is_a
) fprintf(ctx
->out
, ".a");
773 if (cat5
->is_o
) fprintf(ctx
->out
, ".o");
774 if (cat5
->is_p
) fprintf(ctx
->out
, ".p");
775 if (cat5
->is_s
) fprintf(ctx
->out
, ".s");
776 if (desc_indirect
) fprintf(ctx
->out
, ".s2en");
777 if (uniform
) fprintf(ctx
->out
, ".uniform");
780 unsigned base
= (cat5
->s2en_bindless
.base_hi
<< 1) | cat5
->base_lo
;
781 fprintf(ctx
->out
, ".base%d", base
);
784 fprintf(ctx
->out
, " ");
786 switch (_OPC(5, cat5
->opc
)) {
791 fprintf(ctx
->out
, "(%s)", type
[cat5
->type
]);
795 fprintf(ctx
->out
, "(");
796 for (i
= 0; i
< 4; i
++)
797 if (cat5
->wrmask
& (1 << i
))
798 fprintf(ctx
->out
, "%c", "xyzw"[i
]);
799 fprintf(ctx
->out
, ")");
801 print_reg_dst(ctx
, (reg_t
)(cat5
->dst
), type_size(cat5
->type
) == 32, false);
803 if (info
[cat5
->opc
].src1
) {
804 fprintf(ctx
->out
, ", ");
805 struct reginfo src
= { .reg
= (reg_t
)(cat5
->src1
), .full
= cat5
->full
};
806 print_src(ctx
, &src
);
809 if (cat5
->is_o
|| info
[cat5
->opc
].src2
) {
810 fprintf(ctx
->out
, ", ");
811 struct reginfo src
= { .reg
= (reg_t
)(cat5
->src2
), .full
= cat5
->full
};
812 print_src(ctx
, &src
);
814 if (cat5
->is_s2en_bindless
) {
815 if (!desc_indirect
) {
816 if (info
[cat5
->opc
].samp
) {
818 fprintf(ctx
->out
, ", s#%d", cat5
->s2en_bindless
.src3
);
820 fprintf(ctx
->out
, ", s#%d", cat5
->s2en_bindless
.src3
& 0xf);
823 if (info
[cat5
->opc
].tex
&& !use_a1
) {
824 fprintf(ctx
->out
, ", t#%d", cat5
->s2en_bindless
.src3
>> 4);
828 if (info
[cat5
->opc
].samp
)
829 fprintf(ctx
->out
, ", s#%d", cat5
->norm
.samp
);
830 if (info
[cat5
->opc
].tex
)
831 fprintf(ctx
->out
, ", t#%d", cat5
->norm
.tex
);
835 fprintf(ctx
->out
, ", ");
836 struct reginfo src
= { .reg
= (reg_t
)(cat5
->s2en_bindless
.src3
), .full
= bindless
};
837 print_src(ctx
, &src
);
841 fprintf(ctx
->out
, ", a1.x");
843 if (debug
& PRINT_VERBOSE
) {
844 if (cat5
->is_s2en_bindless
) {
845 if ((debug
& PRINT_VERBOSE
) && cat5
->s2en_bindless
.dummy1
)
846 fprintf(ctx
->out
, "\t{5: %x}", cat5
->s2en_bindless
.dummy1
);
848 if ((debug
& PRINT_VERBOSE
) && cat5
->norm
.dummy1
)
849 fprintf(ctx
->out
, "\t{5: %x}", cat5
->norm
.dummy1
);
854 static void print_instr_cat6_a3xx(struct disasm_ctx
*ctx
, instr_t
*instr
)
856 instr_cat6_t
*cat6
= &instr
->cat6
;
857 char sd
= 0, ss
= 0; /* dst/src address space */
859 struct reginfo dst
, src1
, src2
, ssbo
;
862 memset(&dst
, 0, sizeof(dst
));
863 memset(&src1
, 0, sizeof(src1
));
864 memset(&src2
, 0, sizeof(src2
));
865 memset(&ssbo
, 0, sizeof(ssbo
));
867 switch (_OPC(6, cat6
->opc
)) {
870 dst
.full
= type_size(cat6
->type
) == 32;
871 src1
.full
= type_size(cat6
->type
) == 32;
872 src2
.full
= type_size(cat6
->type
) == 32;
885 dst
.full
= type_size(cat6
->type
) == 32;
886 src1
.full
= type_size(cat6
->type
) == 32;
887 src2
.full
= type_size(cat6
->type
) == 32;
890 dst
.full
= type_size(cat6
->type
) == 32;
896 switch (_OPC(6, cat6
->opc
)) {
900 fprintf(ctx
->out
, ".%dd", cat6
->ldgb
.d
+ 1);
903 fprintf(ctx
->out
, ".%s", cat6
->ldgb
.typed
? "typed" : "untyped");
904 fprintf(ctx
->out
, ".%dd", cat6
->ldgb
.d
+ 1);
905 fprintf(ctx
->out
, ".%s", type
[cat6
->type
]);
906 fprintf(ctx
->out
, ".%d", cat6
->ldgb
.type_size
+ 1);
910 fprintf(ctx
->out
, ".%s", cat6
->stgb
.typed
? "typed" : "untyped");
911 fprintf(ctx
->out
, ".%dd", cat6
->stgb
.d
+ 1);
912 fprintf(ctx
->out
, ".%s", type
[cat6
->type
]);
913 fprintf(ctx
->out
, ".%d", cat6
->stgb
.type_size
+ 1);
917 case OPC_ATOMIC_XCHG
:
920 case OPC_ATOMIC_CMPXCHG
:
926 ss
= cat6
->g
? 'g' : 'l';
927 fprintf(ctx
->out
, ".%s", cat6
->ldgb
.typed
? "typed" : "untyped");
928 fprintf(ctx
->out
, ".%dd", cat6
->ldgb
.d
+ 1);
929 fprintf(ctx
->out
, ".%s", type
[cat6
->type
]);
930 fprintf(ctx
->out
, ".%d", cat6
->ldgb
.type_size
+ 1);
931 fprintf(ctx
->out
, ".%c", ss
);
934 dst
.im
= cat6
->g
&& !cat6
->dst_off
;
935 fprintf(ctx
->out
, ".%s", type
[cat6
->type
]);
938 fprintf(ctx
->out
, " ");
940 switch (_OPC(6, cat6
->opc
)) {
981 if ((_OPC(6, cat6
->opc
) == OPC_STGB
) || (_OPC(6, cat6
->opc
) == OPC_STIB
)) {
984 memset(&src3
, 0, sizeof(src3
));
986 src1
.reg
= (reg_t
)(cat6
->stgb
.src1
);
987 src2
.reg
= (reg_t
)(cat6
->stgb
.src2
);
988 src2
.im
= cat6
->stgb
.src2_im
;
989 src3
.reg
= (reg_t
)(cat6
->stgb
.src3
);
990 src3
.im
= cat6
->stgb
.src3_im
;
993 fprintf(ctx
->out
, "g[%u], ", cat6
->stgb
.dst_ssbo
);
994 print_src(ctx
, &src1
);
995 fprintf(ctx
->out
, ", ");
996 print_src(ctx
, &src2
);
997 fprintf(ctx
->out
, ", ");
998 print_src(ctx
, &src3
);
1000 if (debug
& PRINT_VERBOSE
)
1001 fprintf(ctx
->out
, " (pad0=%x, pad3=%x)", cat6
->stgb
.pad0
, cat6
->stgb
.pad3
);
1006 if (is_atomic(_OPC(6, cat6
->opc
))) {
1008 src1
.reg
= (reg_t
)(cat6
->ldgb
.src1
);
1009 src1
.im
= cat6
->ldgb
.src1_im
;
1010 src2
.reg
= (reg_t
)(cat6
->ldgb
.src2
);
1011 src2
.im
= cat6
->ldgb
.src2_im
;
1012 dst
.reg
= (reg_t
)(cat6
->ldgb
.dst
);
1014 print_src(ctx
, &dst
);
1015 fprintf(ctx
->out
, ", ");
1017 struct reginfo src3
;
1018 memset(&src3
, 0, sizeof(src3
));
1020 src3
.reg
= (reg_t
)(cat6
->ldgb
.src3
);
1023 /* For images, the ".typed" variant is used and src2 is
1024 * the ivecN coordinates, ie ivec2 for 2d.
1026 * For SSBOs, the ".untyped" variant is used and src2 is
1027 * a simple dword offset.. src3 appears to be
1028 * uvec2(offset * 4, 0). Not sure the point of that.
1031 fprintf(ctx
->out
, "g[%u], ", cat6
->ldgb
.src_ssbo
);
1032 print_src(ctx
, &src1
); /* value */
1033 fprintf(ctx
->out
, ", ");
1034 print_src(ctx
, &src2
); /* offset/coords */
1035 fprintf(ctx
->out
, ", ");
1036 print_src(ctx
, &src3
); /* 64b byte offset.. */
1038 if (debug
& PRINT_VERBOSE
) {
1039 fprintf(ctx
->out
, " (pad0=%x, mustbe0=%x)", cat6
->ldgb
.pad0
,
1040 cat6
->ldgb
.mustbe0
);
1042 } else { /* ss == 'l' */
1043 fprintf(ctx
->out
, "l[");
1044 print_src(ctx
, &src1
); /* simple byte offset */
1045 fprintf(ctx
->out
, "], ");
1046 print_src(ctx
, &src2
); /* value */
1048 if (debug
& PRINT_VERBOSE
) {
1049 fprintf(ctx
->out
, " (src3=%x, pad0=%x, src_ssbo_im=%x, mustbe0=%x)",
1050 cat6
->ldgb
.src3
, cat6
->ldgb
.pad0
,
1051 cat6
->ldgb
.src_ssbo_im
, cat6
->ldgb
.mustbe0
);
1056 } else if (_OPC(6, cat6
->opc
) == OPC_RESINFO
) {
1057 dst
.reg
= (reg_t
)(cat6
->ldgb
.dst
);
1058 ssbo
.reg
= (reg_t
)(cat6
->ldgb
.src_ssbo
);
1059 ssbo
.im
= cat6
->ldgb
.src_ssbo_im
;
1061 print_src(ctx
, &dst
);
1062 fprintf(ctx
->out
, ", ");
1064 fprintf(ctx
->out
, "g[");
1065 print_src(ctx
, &ssbo
);
1066 fprintf(ctx
->out
, "]");
1069 } else if (_OPC(6, cat6
->opc
) == OPC_LDGB
) {
1071 src1
.reg
= (reg_t
)(cat6
->ldgb
.src1
);
1072 src1
.im
= cat6
->ldgb
.src1_im
;
1073 src2
.reg
= (reg_t
)(cat6
->ldgb
.src2
);
1074 src2
.im
= cat6
->ldgb
.src2_im
;
1075 ssbo
.reg
= (reg_t
)(cat6
->ldgb
.src_ssbo
);
1076 ssbo
.im
= cat6
->ldgb
.src_ssbo_im
;
1077 dst
.reg
= (reg_t
)(cat6
->ldgb
.dst
);
1079 print_src(ctx
, &dst
);
1080 fprintf(ctx
->out
, ", ");
1082 fprintf(ctx
->out
, "g[");
1083 print_src(ctx
, &ssbo
);
1084 fprintf(ctx
->out
, "], ");
1086 print_src(ctx
, &src1
);
1087 fprintf(ctx
->out
, ", ");
1088 print_src(ctx
, &src2
);
1090 if (debug
& PRINT_VERBOSE
)
1091 fprintf(ctx
->out
, " (pad0=%x, ssbo_im=%x, mustbe0=%x)", cat6
->ldgb
.pad0
, cat6
->ldgb
.src_ssbo_im
, cat6
->ldgb
.mustbe0
);
1094 } else if (_OPC(6, cat6
->opc
) == OPC_LDG
&& cat6
->a
.src1_im
&& cat6
->a
.src2_im
) {
1095 struct reginfo src3
;
1097 memset(&src3
, 0, sizeof(src3
));
1098 src1
.reg
= (reg_t
)(cat6
->a
.src1
);
1099 src2
.reg
= (reg_t
)(cat6
->a
.src2
);
1100 src2
.im
= cat6
->a
.src2_im
;
1101 src3
.reg
= (reg_t
)(cat6
->a
.off
);
1103 dst
.reg
= (reg_t
)(cat6
->d
.dst
);
1105 print_src(ctx
, &dst
);
1106 fprintf(ctx
->out
, ", g[");
1107 print_src(ctx
, &src1
);
1108 fprintf(ctx
->out
, "+");
1109 print_src(ctx
, &src3
);
1110 fprintf(ctx
->out
, "], ");
1111 print_src(ctx
, &src2
);
1116 if (cat6
->src_off
) {
1117 src1
.reg
= (reg_t
)(cat6
->a
.src1
);
1118 src1
.im
= cat6
->a
.src1_im
;
1119 src2
.reg
= (reg_t
)(cat6
->a
.src2
);
1120 src2
.im
= cat6
->a
.src2_im
;
1121 src1off
= cat6
->a
.off
;
1123 src1
.reg
= (reg_t
)(cat6
->b
.src1
);
1124 src1
.im
= cat6
->b
.src1_im
;
1125 src2
.reg
= (reg_t
)(cat6
->b
.src2
);
1126 src2
.im
= cat6
->b
.src2_im
;
1131 fprintf(ctx
->out
, "%c[", sd
);
1132 /* note: dst might actually be a src (ie. address to store to) */
1133 if (cat6
->dst_off
) {
1134 dst
.reg
= (reg_t
)(cat6
->c
.dst
);
1135 print_src(ctx
, &dst
);
1137 struct reginfo dstoff_reg
= {
1138 .reg
= (reg_t
) cat6
->c
.off
,
1141 fprintf(ctx
->out
, "+");
1142 print_src(ctx
, &dstoff_reg
);
1143 } else if (cat6
->c
.off
|| cat6
->c
.off_high
) {
1144 fprintf(ctx
->out
, "%+d", ((uint32_t)cat6
->c
.off_high
<< 8) | cat6
->c
.off
);
1147 dst
.reg
= (reg_t
)(cat6
->d
.dst
);
1148 print_src(ctx
, &dst
);
1151 fprintf(ctx
->out
, "]");
1152 fprintf(ctx
->out
, ", ");
1156 fprintf(ctx
->out
, "%c[", ss
);
1158 /* can have a larger than normal immed, so hack: */
1160 fprintf(ctx
->out
, "%u", src1
.reg
.dummy13
);
1162 print_src(ctx
, &src1
);
1165 if (cat6
->src_off
&& cat6
->g
)
1166 print_src(ctx
, &src2
);
1168 fprintf(ctx
->out
, "%+d", src1off
);
1170 fprintf(ctx
->out
, "]");
1172 switch (_OPC(6, cat6
->opc
)) {
1177 fprintf(ctx
->out
, ", ");
1178 print_src(ctx
, &src2
);
1183 static void print_instr_cat6_a6xx(struct disasm_ctx
*ctx
, instr_t
*instr
)
1185 instr_cat6_a6xx_t
*cat6
= &instr
->cat6_a6xx
;
1186 struct reginfo src1
, src2
, ssbo
;
1187 uint32_t opc
= _OPC(6, cat6
->opc
);
1188 bool uses_type
= opc
!= OPC_LDC
;
1190 static const struct {
1194 } desc_features
[8] = {
1202 [CAT6_NONUNIFORM
] = {
1204 .name
= "nonuniform"
1206 [CAT6_BINDLESS_IMM
] = {
1210 [CAT6_BINDLESS_UNIFORM
] = {
1215 [CAT6_BINDLESS_NONUNIFORM
] = {
1218 .name
= "nonuniform"
1222 bool indirect_ssbo
= desc_features
[cat6
->desc_mode
].indirect
;
1223 bool bindless
= desc_features
[cat6
->desc_mode
].bindless
;
1224 bool type_full
= cat6
->type
!= TYPE_U16
;
1227 memset(&src1
, 0, sizeof(src1
));
1228 memset(&src2
, 0, sizeof(src2
));
1229 memset(&ssbo
, 0, sizeof(ssbo
));
1232 fprintf(ctx
->out
, ".%s", cat6
->typed
? "typed" : "untyped");
1233 fprintf(ctx
->out
, ".%dd", cat6
->d
+ 1);
1234 fprintf(ctx
->out
, ".%s", type
[cat6
->type
]);
1236 fprintf(ctx
->out
, ".offset%d", cat6
->d
);
1238 fprintf(ctx
->out
, ".%u", cat6
->type_size
+ 1);
1240 fprintf(ctx
->out
, ".%s", desc_features
[cat6
->desc_mode
].name
);
1242 fprintf(ctx
->out
, ".base%d", cat6
->base
);
1243 fprintf(ctx
->out
, " ");
1245 src2
.reg
= (reg_t
)(cat6
->src2
);
1246 src2
.full
= type_full
;
1247 print_src(ctx
, &src2
);
1248 fprintf(ctx
->out
, ", ");
1250 if (opc
!= OPC_RESINFO
) {
1251 src1
.reg
= (reg_t
)(cat6
->src1
);
1252 src1
.full
= true; // XXX
1253 print_src(ctx
, &src1
);
1254 fprintf(ctx
->out
, ", ");
1257 ssbo
.reg
= (reg_t
)(cat6
->ssbo
);
1258 ssbo
.im
= !indirect_ssbo
;
1260 print_src(ctx
, &ssbo
);
1262 if (debug
& PRINT_VERBOSE
) {
1263 fprintf(ctx
->out
, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
1264 cat6
->pad1
, cat6
->pad2
, cat6
->pad3
, cat6
->pad4
, cat6
->pad5
);
1268 static void print_instr_cat6(struct disasm_ctx
*ctx
, instr_t
*instr
)
1270 if (!is_cat6_legacy(instr
, ctx
->gpu_id
)) {
1271 print_instr_cat6_a6xx(ctx
, instr
);
1272 if (debug
& PRINT_VERBOSE
)
1273 fprintf(ctx
->out
, " NEW");
1275 print_instr_cat6_a3xx(ctx
, instr
);
1276 if (debug
& PRINT_VERBOSE
)
1277 fprintf(ctx
->out
, " LEGACY");
1280 static void print_instr_cat7(struct disasm_ctx
*ctx
, instr_t
*instr
)
1282 instr_cat7_t
*cat7
= &instr
->cat7
;
1285 fprintf(ctx
->out
, ".g");
1287 fprintf(ctx
->out
, ".l");
1289 if (_OPC(7, cat7
->opc
) == OPC_FENCE
) {
1291 fprintf(ctx
->out
, ".r");
1293 fprintf(ctx
->out
, ".w");
1297 /* size of largest OPC field of all the instruction categories: */
1300 static const struct opc_info
{
1304 void (*print
)(struct disasm_ctx
*ctx
, instr_t
*instr
);
1305 } opcs
[1 << (3+NOPC_BITS
)] = {
1306 #define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
1308 OPC(0, OPC_NOP
, nop
),
1310 OPC(0, OPC_JUMP
, jump
),
1311 OPC(0, OPC_CALL
, call
),
1312 OPC(0, OPC_RET
, ret
),
1313 OPC(0, OPC_KILL
, kill
),
1314 OPC(0, OPC_END
, end
),
1315 OPC(0, OPC_EMIT
, emit
),
1316 OPC(0, OPC_CUT
, cut
),
1317 OPC(0, OPC_CHMASK
, chmask
),
1318 OPC(0, OPC_CHSH
, chsh
),
1319 OPC(0, OPC_FLOW_REV
, flow_rev
),
1320 OPC(0, OPC_PREDT
, predt
),
1321 OPC(0, OPC_PREDF
, predf
),
1322 OPC(0, OPC_PREDE
, prede
),
1323 OPC(0, OPC_BKT
, bkt
),
1324 OPC(0, OPC_STKS
, stks
),
1325 OPC(0, OPC_STKR
, stkr
),
1326 OPC(0, OPC_XSET
, xset
),
1327 OPC(0, OPC_XCLR
, xclr
),
1328 OPC(0, OPC_GETONE
, getone
),
1329 OPC(0, OPC_DBG
, dbg
),
1330 OPC(0, OPC_SHPS
, shps
),
1331 OPC(0, OPC_SHPE
, shpe
),
1337 OPC(2, OPC_ADD_F
, add
.f
),
1338 OPC(2, OPC_MIN_F
, min
.f
),
1339 OPC(2, OPC_MAX_F
, max
.f
),
1340 OPC(2, OPC_MUL_F
, mul
.f
),
1341 OPC(2, OPC_SIGN_F
, sign
.f
),
1342 OPC(2, OPC_CMPS_F
, cmps
.f
),
1343 OPC(2, OPC_ABSNEG_F
, absneg
.f
),
1344 OPC(2, OPC_CMPV_F
, cmpv
.f
),
1345 OPC(2, OPC_FLOOR_F
, floor
.f
),
1346 OPC(2, OPC_CEIL_F
, ceil
.f
),
1347 OPC(2, OPC_RNDNE_F
, rndne
.f
),
1348 OPC(2, OPC_RNDAZ_F
, rndaz
.f
),
1349 OPC(2, OPC_TRUNC_F
, trunc
.f
),
1350 OPC(2, OPC_ADD_U
, add
.u
),
1351 OPC(2, OPC_ADD_S
, add
.s
),
1352 OPC(2, OPC_SUB_U
, sub
.u
),
1353 OPC(2, OPC_SUB_S
, sub
.s
),
1354 OPC(2, OPC_CMPS_U
, cmps
.u
),
1355 OPC(2, OPC_CMPS_S
, cmps
.s
),
1356 OPC(2, OPC_MIN_U
, min
.u
),
1357 OPC(2, OPC_MIN_S
, min
.s
),
1358 OPC(2, OPC_MAX_U
, max
.u
),
1359 OPC(2, OPC_MAX_S
, max
.s
),
1360 OPC(2, OPC_ABSNEG_S
, absneg
.s
),
1361 OPC(2, OPC_AND_B
, and.b
),
1362 OPC(2, OPC_OR_B
, or.b
),
1363 OPC(2, OPC_NOT_B
, not.b
),
1364 OPC(2, OPC_XOR_B
, xor.b
),
1365 OPC(2, OPC_CMPV_U
, cmpv
.u
),
1366 OPC(2, OPC_CMPV_S
, cmpv
.s
),
1367 OPC(2, OPC_MUL_U24
, mul
.u24
),
1368 OPC(2, OPC_MUL_S24
, mul
.s24
),
1369 OPC(2, OPC_MULL_U
, mull
.u
),
1370 OPC(2, OPC_BFREV_B
, bfrev
.b
),
1371 OPC(2, OPC_CLZ_S
, clz
.s
),
1372 OPC(2, OPC_CLZ_B
, clz
.b
),
1373 OPC(2, OPC_SHL_B
, shl
.b
),
1374 OPC(2, OPC_SHR_B
, shr
.b
),
1375 OPC(2, OPC_ASHR_B
, ashr
.b
),
1376 OPC(2, OPC_BARY_F
, bary
.f
),
1377 OPC(2, OPC_MGEN_B
, mgen
.b
),
1378 OPC(2, OPC_GETBIT_B
, getbit
.b
),
1379 OPC(2, OPC_SETRM
, setrm
),
1380 OPC(2, OPC_CBITS_B
, cbits
.b
),
1381 OPC(2, OPC_SHB
, shb
),
1382 OPC(2, OPC_MSAD
, msad
),
1385 OPC(3, OPC_MAD_U16
, mad
.u16
),
1386 OPC(3, OPC_MADSH_U16
, madsh
.u16
),
1387 OPC(3, OPC_MAD_S16
, mad
.s16
),
1388 OPC(3, OPC_MADSH_M16
, madsh
.m16
),
1389 OPC(3, OPC_MAD_U24
, mad
.u24
),
1390 OPC(3, OPC_MAD_S24
, mad
.s24
),
1391 OPC(3, OPC_MAD_F16
, mad
.f16
),
1392 OPC(3, OPC_MAD_F32
, mad
.f32
),
1393 OPC(3, OPC_SEL_B16
, sel
.b16
),
1394 OPC(3, OPC_SEL_B32
, sel
.b32
),
1395 OPC(3, OPC_SEL_S16
, sel
.s16
),
1396 OPC(3, OPC_SEL_S32
, sel
.s32
),
1397 OPC(3, OPC_SEL_F16
, sel
.f16
),
1398 OPC(3, OPC_SEL_F32
, sel
.f32
),
1399 OPC(3, OPC_SAD_S16
, sad
.s16
),
1400 OPC(3, OPC_SAD_S32
, sad
.s32
),
1403 OPC(4, OPC_RCP
, rcp
),
1404 OPC(4, OPC_RSQ
, rsq
),
1405 OPC(4, OPC_LOG2
, log2
),
1406 OPC(4, OPC_EXP2
, exp2
),
1407 OPC(4, OPC_SIN
, sin
),
1408 OPC(4, OPC_COS
, cos
),
1409 OPC(4, OPC_SQRT
, sqrt
),
1410 OPC(4, OPC_HRSQ
, hrsq
),
1411 OPC(4, OPC_HLOG2
, hlog2
),
1412 OPC(4, OPC_HEXP2
, hexp2
),
1415 OPC(5, OPC_ISAM
, isam
),
1416 OPC(5, OPC_ISAML
, isaml
),
1417 OPC(5, OPC_ISAMM
, isamm
),
1418 OPC(5, OPC_SAM
, sam
),
1419 OPC(5, OPC_SAMB
, samb
),
1420 OPC(5, OPC_SAML
, saml
),
1421 OPC(5, OPC_SAMGQ
, samgq
),
1422 OPC(5, OPC_GETLOD
, getlod
),
1423 OPC(5, OPC_CONV
, conv
),
1424 OPC(5, OPC_CONVM
, convm
),
1425 OPC(5, OPC_GETSIZE
, getsize
),
1426 OPC(5, OPC_GETBUF
, getbuf
),
1427 OPC(5, OPC_GETPOS
, getpos
),
1428 OPC(5, OPC_GETINFO
, getinfo
),
1429 OPC(5, OPC_DSX
, dsx
),
1430 OPC(5, OPC_DSY
, dsy
),
1431 OPC(5, OPC_GATHER4R
, gather4r
),
1432 OPC(5, OPC_GATHER4G
, gather4g
),
1433 OPC(5, OPC_GATHER4B
, gather4b
),
1434 OPC(5, OPC_GATHER4A
, gather4a
),
1435 OPC(5, OPC_SAMGP0
, samgp0
),
1436 OPC(5, OPC_SAMGP1
, samgp1
),
1437 OPC(5, OPC_SAMGP2
, samgp2
),
1438 OPC(5, OPC_SAMGP3
, samgp3
),
1439 OPC(5, OPC_DSXPP_1
, dsxpp
.1),
1440 OPC(5, OPC_DSYPP_1
, dsypp
.1),
1441 OPC(5, OPC_RGETPOS
, rgetpos
),
1442 OPC(5, OPC_RGETINFO
, rgetinfo
),
1443 /* macros are needed here for ir3_print */
1444 OPC(5, OPC_DSXPP_MACRO
, dsxpp
.macro
),
1445 OPC(5, OPC_DSYPP_MACRO
, dsypp
.macro
),
1449 OPC(6, OPC_LDG
, ldg
),
1450 OPC(6, OPC_LDL
, ldl
),
1451 OPC(6, OPC_LDP
, ldp
),
1452 OPC(6, OPC_STG
, stg
),
1453 OPC(6, OPC_STL
, stl
),
1454 OPC(6, OPC_STP
, stp
),
1455 OPC(6, OPC_LDIB
, ldib
),
1456 OPC(6, OPC_G2L
, g2l
),
1457 OPC(6, OPC_L2G
, l2g
),
1458 OPC(6, OPC_PREFETCH
, prefetch
),
1459 OPC(6, OPC_LDLW
, ldlw
),
1460 OPC(6, OPC_STLW
, stlw
),
1461 OPC(6, OPC_RESFMT
, resfmt
),
1462 OPC(6, OPC_RESINFO
, resinfo
),
1463 OPC(6, OPC_ATOMIC_ADD
, atomic
.add
),
1464 OPC(6, OPC_ATOMIC_SUB
, atomic
.sub
),
1465 OPC(6, OPC_ATOMIC_XCHG
, atomic
.xchg
),
1466 OPC(6, OPC_ATOMIC_INC
, atomic
.inc
),
1467 OPC(6, OPC_ATOMIC_DEC
, atomic
.dec
),
1468 OPC(6, OPC_ATOMIC_CMPXCHG
, atomic
.cmpxchg
),
1469 OPC(6, OPC_ATOMIC_MIN
, atomic
.min
),
1470 OPC(6, OPC_ATOMIC_MAX
, atomic
.max
),
1471 OPC(6, OPC_ATOMIC_AND
, atomic
.and),
1472 OPC(6, OPC_ATOMIC_OR
, atomic
.or),
1473 OPC(6, OPC_ATOMIC_XOR
, atomic
.xor),
1474 OPC(6, OPC_LDGB
, ldgb
),
1475 OPC(6, OPC_STGB
, stgb
),
1476 OPC(6, OPC_STIB
, stib
),
1477 OPC(6, OPC_LDC
, ldc
),
1478 OPC(6, OPC_LDLV
, ldlv
),
1480 OPC(7, OPC_BAR
, bar
),
1481 OPC(7, OPC_FENCE
, fence
),
1486 #define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
1488 const char *disasm_a3xx_instr_name(opc_t opc
)
1490 if (opc_cat(opc
) == -1) return "??meta??";
1491 return opcs
[opc
].name
;
1494 static void print_single_instr(struct disasm_ctx
*ctx
, instr_t
*instr
)
1496 const char *name
= GETINFO(instr
)->name
;
1497 uint32_t opc
= instr_opc(instr
, ctx
->gpu_id
);
1500 fprintf(ctx
->out
, "%s", name
);
1501 GETINFO(instr
)->print(ctx
, instr
);
1503 fprintf(ctx
->out
, "unknown(%d,%d)", instr
->opc_cat
, opc
);
1505 switch (instr
->opc_cat
) {
1506 case 0: print_instr_cat0(ctx
, instr
); break;
1507 case 1: print_instr_cat1(ctx
, instr
); break;
1508 case 2: print_instr_cat2(ctx
, instr
); break;
1509 case 3: print_instr_cat3(ctx
, instr
); break;
1510 case 4: print_instr_cat4(ctx
, instr
); break;
1511 case 5: print_instr_cat5(ctx
, instr
); break;
1512 case 6: print_instr_cat6(ctx
, instr
); break;
1513 case 7: print_instr_cat7(ctx
, instr
); break;
1518 static bool print_instr(struct disasm_ctx
*ctx
, uint32_t *dwords
, int n
)
1520 instr_t
*instr
= (instr_t
*)dwords
;
1521 opc_t opc
= _OPC(instr
->opc_cat
, instr_opc(instr
, ctx
->gpu_id
));
1523 unsigned cycles
= ctx
->stats
->instructions
;
1525 if (debug
& PRINT_RAW
) {
1526 fprintf(ctx
->out
, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels
[ctx
->level
],
1527 instr
->opc_cat
, n
, cycles
++, dwords
[1], dwords
[0]);
1530 if (opc
== OPC_BARY_F
)
1531 ctx
->stats
->last_baryf
= ctx
->stats
->instructions
;
1533 ctx
->repeat
= instr_repeat(instr
);
1534 ctx
->stats
->instructions
+= 1 + ctx
->repeat
;
1535 ctx
->stats
->instlen
++;
1537 /* NOTE: order flags are printed is a bit fugly.. but for now I
1538 * try to match the order in llvm-a3xx disassembler for easy
1543 fprintf(ctx
->out
, "(sy)");
1546 if (instr
->ss
&& ((instr
->opc_cat
<= 4) || (instr
->opc_cat
== 7))) {
1547 fprintf(ctx
->out
, "(ss)");
1551 fprintf(ctx
->out
, "(jp)");
1552 if ((instr
->opc_cat
== 0) && instr
->cat0
.eq
)
1553 fprintf(ctx
->out
, "(eq)");
1554 if (instr_sat(instr
))
1555 fprintf(ctx
->out
, "(sat)");
1557 fprintf(ctx
->out
, "(rpt%d)", ctx
->repeat
);
1558 else if ((instr
->opc_cat
== 2) && (instr
->cat2
.src1_r
|| instr
->cat2
.src2_r
))
1559 nop
= (instr
->cat2
.src2_r
* 2) + instr
->cat2
.src1_r
;
1560 else if ((instr
->opc_cat
== 3) && (instr
->cat3
.src1_r
|| instr
->cat3
.src2_r
))
1561 nop
= (instr
->cat3
.src2_r
* 2) + instr
->cat3
.src1_r
;
1563 fprintf(ctx
->out
, "(nop%d) ", nop
);
1565 if (instr
->ul
&& ((2 <= instr
->opc_cat
) && (instr
->opc_cat
<= 4)))
1566 fprintf(ctx
->out
, "(ul)");
1568 ctx
->stats
->instructions
+= nop
;
1569 ctx
->stats
->nops
+= nop
;
1570 if (opc
== OPC_NOP
) {
1571 ctx
->stats
->nops
+= 1 + ctx
->repeat
;
1572 ctx
->stats
->instrs_per_cat
[0] += 1 + ctx
->repeat
;
1574 ctx
->stats
->instrs_per_cat
[instr
->opc_cat
] += 1 + ctx
->repeat
;
1575 ctx
->stats
->instrs_per_cat
[0] += nop
;
1578 if (opc
== OPC_MOV
) {
1579 if (instr
->cat1
.src_type
== instr
->cat1
.dst_type
) {
1580 ctx
->stats
->mov_count
+= 1 + ctx
->repeat
;
1582 ctx
->stats
->cov_count
+= 1 + ctx
->repeat
;
1586 print_single_instr(ctx
, instr
);
1587 fprintf(ctx
->out
, "\n");
1589 process_reg_dst(ctx
);
1591 if ((instr
->opc_cat
<= 4) && (debug
& EXPAND_REPEAT
)) {
1593 for (i
= 0; i
< nop
; i
++) {
1594 if (debug
& PRINT_VERBOSE
) {
1595 fprintf(ctx
->out
, "%s:%d:%04d:%04d[ ] ",
1596 levels
[ctx
->level
], instr
->opc_cat
, n
, cycles
++);
1598 fprintf(ctx
->out
, "nop\n");
1600 for (i
= 0; i
< ctx
->repeat
; i
++) {
1601 ctx
->repeatidx
= i
+ 1;
1602 if (debug
& PRINT_VERBOSE
) {
1603 fprintf(ctx
->out
, "%s:%d:%04d:%04d[ ] ",
1604 levels
[ctx
->level
], instr
->opc_cat
, n
, cycles
++);
1606 print_single_instr(ctx
, instr
);
1607 fprintf(ctx
->out
, "\n");
1612 return (instr
->opc_cat
== 0) &&
1613 ((opc
== OPC_END
) || (opc
== OPC_CHSH
));
1616 int disasm_a3xx_stat(uint32_t *dwords
, int sizedwords
, int level
, FILE *out
,
1617 unsigned gpu_id
, struct shader_stats
*stats
)
1619 struct disasm_ctx ctx
;
1622 bool has_end
= false;
1624 ir3_assert((sizedwords
% 2) == 0);
1626 memset(&ctx
, 0, sizeof(ctx
));
1629 ctx
.gpu_id
= gpu_id
;
1631 if (gpu_id
>= 600) {
1632 ctx
.regs
.used
.mergedregs
= true;
1633 ctx
.regs
.rbw
.mergedregs
= true;
1634 ctx
.regs
.war
.mergedregs
= true;
1636 memset(ctx
.stats
, 0, sizeof(*ctx
.stats
));
1638 for (i
= 0; i
< sizedwords
; i
+= 2) {
1639 has_end
|= print_instr(&ctx
, &dwords
[i
], i
/2);
1642 if (dwords
[i
] == 0 && dwords
[i
+ 1] == 0)
1650 if (debug
& PRINT_STATS
)
1651 print_reg_stats(&ctx
);
1656 void disasm_a3xx_set_debug(enum debug_t d
)
1663 static bool jmp_env_valid
;
1664 static jmp_buf jmp_env
;
1667 ir3_assert_handler(const char *expr
, const char *file
, int line
,
1670 fprintf(stdout
, "\n%s:%u: %s: Assertion `%s' failed.\n", file
, line
, func
, expr
);
1672 longjmp(jmp_env
, 1);
1676 #define TRY(x) do { \
1677 assert(!jmp_env_valid); \
1678 if (setjmp(jmp_env) == 0) { \
1679 jmp_env_valid = true; \
1682 jmp_env_valid = false; \
1686 int disasm_a3xx(uint32_t *dwords
, int sizedwords
, int level
, FILE *out
, unsigned gpu_id
)
1688 struct shader_stats stats
;
1689 return disasm_a3xx_stat(dwords
, sizedwords
, level
, out
, gpu_id
, &stats
);
1692 int try_disasm_a3xx(uint32_t *dwords
, int sizedwords
, int level
, FILE *out
, unsigned gpu_id
)
1694 struct shader_stats stats
;
1696 TRY(ret
= disasm_a3xx_stat(dwords
, sizedwords
, level
, out
, gpu_id
, &stats
));