2555c739082fd3613c7c8205009af5719d0872ca
[mesa.git] / src / freedreno / ir3 / disasm-a3xx.c
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <assert.h>
30
31 #include <util/u_debug.h>
32
33 #include "disasm.h"
34 #include "instr-a3xx.h"
35 #include "regmask.h"
36
37 static enum debug_t debug;
38
39 #define printf debug_printf
40
41 static const char *levels[] = {
42 "",
43 "\t",
44 "\t\t",
45 "\t\t\t",
46 "\t\t\t\t",
47 "\t\t\t\t\t",
48 "\t\t\t\t\t\t",
49 "\t\t\t\t\t\t\t",
50 "\t\t\t\t\t\t\t\t",
51 "\t\t\t\t\t\t\t\t\t",
52 "x",
53 "x",
54 "x",
55 "x",
56 "x",
57 "x",
58 };
59
60 static const char *component = "xyzw";
61
62 static const char *type[] = {
63 [TYPE_F16] = "f16",
64 [TYPE_F32] = "f32",
65 [TYPE_U16] = "u16",
66 [TYPE_U32] = "u32",
67 [TYPE_S16] = "s16",
68 [TYPE_S32] = "s32",
69 [TYPE_U8] = "u8",
70 [TYPE_S8] = "s8",
71 };
72
73 struct disasm_ctx {
74 FILE *out;
75 int level;
76 unsigned gpu_id;
77
78 struct shader_stats *stats;
79
80 /* we have to process the dst register after src to avoid tripping up
81 * the read-before-write detection
82 */
83 unsigned last_dst;
84 bool last_dst_full;
85 bool last_dst_valid;
86
87 /* current instruction repeat flag: */
88 unsigned repeat;
89 /* current instruction repeat indx/offset (for --expand): */
90 unsigned repeatidx;
91
92 /* tracking for register usage */
93 struct {
94 regmask_t used;
95 regmask_t used_merged;
96 regmask_t rbw; /* read before write */
97 regmask_t war; /* write after read */
98 unsigned max_const;
99 } regs;
100 };
101
102 static const char *float_imms[] = {
103 "0.0",
104 "0.5",
105 "1.0",
106 "2.0",
107 "e",
108 "pi",
109 "1/pi",
110 "1/log2(e)",
111 "log2(e)",
112 "1/log2(10)",
113 "log2(10)",
114 "4.0",
115 };
116
117 static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full,
118 bool is_float, bool r,
119 bool c, bool im, bool neg, bool abs, bool addr_rel)
120 {
121 const char type = c ? 'c' : 'r';
122
123 // XXX I prefer - and || for neg/abs, but preserving format used
124 // by libllvm-a3xx for easy diffing..
125
126 if (abs && neg)
127 fprintf(ctx->out, "(absneg)");
128 else if (neg)
129 fprintf(ctx->out, "(neg)");
130 else if (abs)
131 fprintf(ctx->out, "(abs)");
132
133 if (r)
134 fprintf(ctx->out, "(r)");
135
136 if (im) {
137 if (is_float && full && reg.iim_val < ARRAY_SIZE(float_imms)) {
138 fprintf(ctx->out, "(%s)", float_imms[reg.iim_val]);
139 } else {
140 fprintf(ctx->out, "%d", reg.iim_val);
141 }
142 } else if (addr_rel) {
143 /* I would just use %+d but trying to make it diff'able with
144 * libllvm-a3xx...
145 */
146 if (reg.iim_val < 0)
147 fprintf(ctx->out, "%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
148 else if (reg.iim_val > 0)
149 fprintf(ctx->out, "%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
150 else
151 fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
152 } else if ((reg.num == REG_A0) && !c) {
153 /* This matches libllvm output, the second (scalar) address register
154 * seems to be called a1.x instead of a0.y.
155 */
156 fprintf(ctx->out, "a%d.x", reg.comp);
157 } else if ((reg.num == REG_P0) && !c) {
158 fprintf(ctx->out, "p0.%c", component[reg.comp]);
159 } else {
160 fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
161 if (0 && full && !c) {
162 reg_t hr0 = reg;
163 hr0.iim_val *= 2;
164 reg_t hr1 = hr0;
165 hr1.iim_val += 1;
166 fprintf(ctx->out, " (hr%d.%c,hr%d.%c)", hr0.num, component[hr0.comp], hr1.num, component[hr1.comp]);
167 }
168 }
169 }
170
171 static void regmask_set(regmask_t *regmask, unsigned num, bool full)
172 {
173 ir3_assert(num < MAX_REG);
174 __regmask_set(regmask, !full, num);
175 }
176
177 static void regmask_clear(regmask_t *regmask, unsigned num, bool full)
178 {
179 ir3_assert(num < MAX_REG);
180 __regmask_clear(regmask, !full, num);
181 }
182
183 static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
184 {
185 ir3_assert(num < MAX_REG);
186 return __regmask_get(regmask, !full, num);
187 }
188
189 static unsigned regidx(reg_t reg)
190 {
191 return (4 * reg.num) + reg.comp;
192 }
193
194 static reg_t idxreg(unsigned idx)
195 {
196 return (reg_t){
197 .comp = idx & 0x3,
198 .num = idx >> 2,
199 };
200 }
201
202 static void print_sequence(struct disasm_ctx *ctx, int first, int last)
203 {
204 if (first != MAX_REG) {
205 if (first == last) {
206 fprintf(ctx->out, " %d", first);
207 } else {
208 fprintf(ctx->out, " %d-%d", first, last);
209 }
210 }
211 }
212
213 static int print_regs(struct disasm_ctx *ctx, regmask_t *regmask, bool full)
214 {
215 int num, max = 0, cnt = 0;
216 int first, last;
217
218 first = last = MAX_REG;
219
220 for (num = 0; num < MAX_REG; num++) {
221 if (regmask_get(regmask, num, full)) {
222 if (num != (last + 1)) {
223 print_sequence(ctx, first, last);
224 first = num;
225 }
226 last = num;
227 if (num < (48*4))
228 max = num;
229 cnt++;
230 }
231 }
232
233 print_sequence(ctx, first, last);
234
235 fprintf(ctx->out, " (cnt=%d, max=%d)", cnt, max);
236
237 return max;
238 }
239
240 static void print_reg_stats(struct disasm_ctx *ctx)
241 {
242 int fullreg, halfreg;
243
244 fprintf(ctx->out, "%sRegister Stats:\n", levels[ctx->level]);
245 fprintf(ctx->out, "%s- used (half):", levels[ctx->level]);
246 halfreg = print_regs(ctx, &ctx->regs.used, false);
247 fprintf(ctx->out, "\n");
248 fprintf(ctx->out, "%s- used (full):", levels[ctx->level]);
249 fullreg = print_regs(ctx, &ctx->regs.used, true);
250 fprintf(ctx->out, "\n");
251 if (ctx->gpu_id >= 600) {
252 fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]);
253 print_regs(ctx, &ctx->regs.used_merged, false);
254 fprintf(ctx->out, "\n");
255 }
256 fprintf(ctx->out, "%s- input (half):", levels[ctx->level]);
257 print_regs(ctx, &ctx->regs.rbw, false);
258 fprintf(ctx->out, "\n");
259 fprintf(ctx->out, "%s- input (full):", levels[ctx->level]);
260 print_regs(ctx, &ctx->regs.rbw, true);
261 fprintf(ctx->out, "\n");
262 fprintf(ctx->out, "%s- max const: %u\n", levels[ctx->level], ctx->regs.max_const);
263 fprintf(ctx->out, "\n");
264 fprintf(ctx->out, "%s- output (half):", levels[ctx->level]);
265 print_regs(ctx, &ctx->regs.war, false);
266 fprintf(ctx->out, " (estimated)\n");
267 fprintf(ctx->out, "%s- output (full):", levels[ctx->level]);
268 print_regs(ctx, &ctx->regs.war, true);
269 fprintf(ctx->out, " (estimated)\n");
270
271 /* convert to vec4, which is the granularity that registers are
272 * assigned to shader:
273 */
274 fullreg = (fullreg + 3) / 4;
275 halfreg = (halfreg + 3) / 4;
276
277 // Note this count of instructions includes rptN, which matches
278 // up to how mesa prints this:
279 fprintf(ctx->out, "%s- shaderdb: %d instructions, %d nops, %d non-nops, "
280 "(%d instlen), %u last-baryf, %d half, %d full\n",
281 levels[ctx->level], ctx->stats->instructions, ctx->stats->nops,
282 ctx->stats->instructions - ctx->stats->nops, ctx->stats->instlen,
283 ctx->stats->last_baryf, halfreg, fullreg);
284 fprintf(ctx->out, "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, "
285 "%u cat4, %u cat5, %u cat6, %u cat7\n",
286 levels[ctx->level],
287 ctx->stats->instrs_per_cat[0],
288 ctx->stats->instrs_per_cat[1],
289 ctx->stats->instrs_per_cat[2],
290 ctx->stats->instrs_per_cat[3],
291 ctx->stats->instrs_per_cat[4],
292 ctx->stats->instrs_per_cat[5],
293 ctx->stats->instrs_per_cat[6],
294 ctx->stats->instrs_per_cat[7]);
295 fprintf(ctx->out, "%s- shaderdb: %d (ss), %d (sy)\n", levels[ctx->level],
296 ctx->stats->ss, ctx->stats->sy);
297 }
298
299 static void process_reg_dst(struct disasm_ctx *ctx)
300 {
301 if (!ctx->last_dst_valid)
302 return;
303
304 /* ignore dummy writes (ie. r63.x): */
305 if (!VALIDREG(ctx->last_dst))
306 return;
307
308 for (unsigned i = 0; i <= ctx->repeat; i++) {
309 unsigned dst = ctx->last_dst + i;
310
311 regmask_set(&ctx->regs.war, dst, ctx->last_dst_full);
312 regmask_set(&ctx->regs.used, dst, ctx->last_dst_full);
313
314 if (ctx->gpu_id >= 600) {
315 if (ctx->last_dst_full) {
316 regmask_set(&ctx->regs.used_merged, (dst*2)+0, false);
317 regmask_set(&ctx->regs.used_merged, (dst*2)+1, false);
318 } else {
319 regmask_set(&ctx->regs.used_merged, dst, false);
320 }
321 }
322 }
323
324 ctx->last_dst_valid = false;
325 }
326 static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
327 {
328 /* presumably the special registers a0.c and p0.c don't count.. */
329 if (!(addr_rel || (reg.num == REG_A0) || (reg.num == REG_P0))) {
330 ctx->last_dst = regidx(reg);
331 ctx->last_dst_full = full;
332 ctx->last_dst_valid = true;
333 }
334 reg = idxreg(regidx(reg) + ctx->repeatidx);
335 print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel);
336 }
337
338 /* TODO switch to using reginfo struct everywhere, since more readable
339 * than passing a bunch of bools to print_reg_src
340 */
341
342 struct reginfo {
343 reg_t reg;
344 bool full;
345 bool r;
346 bool c;
347 bool f; /* src reg is interpreted as float, used for printing immediates */
348 bool im;
349 bool neg;
350 bool abs;
351 bool addr_rel;
352 };
353
354 static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
355 {
356 reg_t reg = info->reg;
357
358 /* presumably the special registers a0.c and p0.c don't count.. */
359 if (!(info->addr_rel || info->c || info->im ||
360 (reg.num == REG_A0) || (reg.num == REG_P0))) {
361 int i, num = regidx(reg);
362 for (i = 0; i <= ctx->repeat; i++) {
363 unsigned src = num + i;
364
365 if (!regmask_get(&ctx->regs.used, src, info->full))
366 regmask_set(&ctx->regs.rbw, src, info->full);
367
368 regmask_clear(&ctx->regs.war, src, info->full);
369 regmask_set(&ctx->regs.used, src, info->full);
370
371 if (info->full) {
372 regmask_set(&ctx->regs.used_merged, (src*2)+0, false);
373 regmask_set(&ctx->regs.used_merged, (src*2)+1, false);
374 } else {
375 regmask_set(&ctx->regs.used_merged, src, false);
376 }
377
378 if (!info->r)
379 break;
380 }
381 } else if (info->c) {
382 int i, num = regidx(reg);
383 for (i = 0; i <= ctx->repeat; i++) {
384 unsigned src = num + i;
385
386 ctx->regs.max_const = MAX2(ctx->regs.max_const, src);
387
388 if (!info->r)
389 break;
390 }
391
392 unsigned max = (num + ctx->repeat + 1 + 3) / 4;
393 if (max > ctx->stats->constlen)
394 ctx->stats->constlen = max;
395 }
396
397 if (info->r)
398 reg = idxreg(regidx(info->reg) + ctx->repeatidx);
399
400 print_reg(ctx, reg, info->full, info->f, info->r, info->c, info->im,
401 info->neg, info->abs, info->addr_rel);
402 }
403
404 //static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
405 //{
406 // print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
407 //}
408
409 static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr)
410 {
411 static const struct {
412 const char *suffix;
413 int nsrc;
414 bool idx;
415 } brinfo[7] = {
416 [BRANCH_PLAIN] = { "r", 1, false },
417 [BRANCH_OR] = { "rao", 2, false },
418 [BRANCH_AND] = { "raa", 2, false },
419 [BRANCH_CONST] = { "rac", 0, true },
420 [BRANCH_ANY] = { "any", 1, false },
421 [BRANCH_ALL] = { "all", 1, false },
422 [BRANCH_X] = { "rax", 0, false },
423 };
424 instr_cat0_t *cat0 = &instr->cat0;
425
426 switch (instr_opc(instr, ctx->gpu_id)) {
427 case OPC_KILL:
428 case OPC_PREDT:
429 case OPC_PREDF:
430 fprintf(ctx->out, " %sp0.%c", cat0->inv0 ? "!" : "",
431 component[cat0->comp0]);
432 break;
433 case OPC_B:
434 fprintf(ctx->out, "%s", brinfo[cat0->brtype].suffix);
435 if (brinfo[cat0->brtype].idx) {
436 fprintf(ctx->out, ".%u", cat0->idx);
437 }
438 if (brinfo[cat0->brtype].nsrc >= 1) {
439 fprintf(ctx->out, " %sp0.%c,", cat0->inv0 ? "!" : "",
440 component[cat0->comp0]);
441 }
442 if (brinfo[cat0->brtype].nsrc >= 2) {
443 fprintf(ctx->out, " %sp0.%c,", cat0->inv1 ? "!" : "",
444 component[cat0->comp1]);
445 }
446 fprintf(ctx->out, " #%d", cat0->a3xx.immed);
447 break;
448 case OPC_JUMP:
449 case OPC_CALL:
450 case OPC_BKT:
451 case OPC_GETONE:
452 case OPC_SHPS:
453 fprintf(ctx->out, " #%d", cat0->a3xx.immed);
454 break;
455 }
456
457 if ((debug & PRINT_VERBOSE) && (cat0->dummy3|cat0->dummy4))
458 fprintf(ctx->out, "\t{0: %x,%x}", cat0->dummy3, cat0->dummy4);
459 }
460
461 static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr)
462 {
463 instr_cat1_t *cat1 = &instr->cat1;
464
465 if (cat1->ul)
466 fprintf(ctx->out, "(ul)");
467
468 if (cat1->src_type == cat1->dst_type) {
469 if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
470 /* special case (nmemonic?): */
471 fprintf(ctx->out, "mova");
472 } else {
473 fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
474 }
475 } else {
476 fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
477 }
478
479 fprintf(ctx->out, " ");
480
481 if (cat1->even)
482 fprintf(ctx->out, "(even)");
483
484 if (cat1->pos_inf)
485 fprintf(ctx->out, "(pos_infinity)");
486
487 print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
488 cat1->dst_rel);
489
490 fprintf(ctx->out, ", ");
491
492 /* ugg, have to special case this.. vs print_reg().. */
493 if (cat1->src_im) {
494 if (type_float(cat1->src_type))
495 fprintf(ctx->out, "(%f)", cat1->fim_val);
496 else if (type_uint(cat1->src_type))
497 fprintf(ctx->out, "0x%08x", cat1->uim_val);
498 else
499 fprintf(ctx->out, "%d", cat1->iim_val);
500 } else if (cat1->src_rel && !cat1->src_c) {
501 /* I would just use %+d but trying to make it diff'able with
502 * libllvm-a3xx...
503 */
504 char type = cat1->src_rel_c ? 'c' : 'r';
505 const char *full = (type_size(cat1->src_type) == 32) ? "" : "h";
506 if (cat1->off < 0)
507 fprintf(ctx->out, "%s%c<a0.x - %d>", full, type, -cat1->off);
508 else if (cat1->off > 0)
509 fprintf(ctx->out, "%s%c<a0.x + %d>", full, type, cat1->off);
510 else
511 fprintf(ctx->out, "%s%c<a0.x>", full, type);
512 } else {
513 struct reginfo src = {
514 .reg = (reg_t)cat1->src,
515 .full = type_size(cat1->src_type) == 32,
516 .r = cat1->src_r,
517 .c = cat1->src_c,
518 .im = cat1->src_im,
519 };
520 print_src(ctx, &src);
521 }
522
523 if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
524 fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0);
525 }
526
527 static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr)
528 {
529 instr_cat2_t *cat2 = &instr->cat2;
530 int opc = _OPC(2, cat2->opc);
531 static const char *cond[] = {
532 "lt",
533 "le",
534 "gt",
535 "ge",
536 "eq",
537 "ne",
538 "?6?",
539 };
540
541 switch (opc) {
542 case OPC_CMPS_F:
543 case OPC_CMPS_U:
544 case OPC_CMPS_S:
545 case OPC_CMPV_F:
546 case OPC_CMPV_U:
547 case OPC_CMPV_S:
548 fprintf(ctx->out, ".%s", cond[cat2->cond]);
549 break;
550 }
551
552 fprintf(ctx->out, " ");
553 if (cat2->ei)
554 fprintf(ctx->out, "(ei)");
555 print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
556 fprintf(ctx->out, ", ");
557
558 struct reginfo src1 = {
559 .full = cat2->full,
560 .r = cat2->repeat ? cat2->src1_r : 0,
561 .f = is_cat2_float(opc),
562 .im = cat2->src1_im,
563 .abs = cat2->src1_abs,
564 .neg = cat2->src1_neg,
565 };
566
567 if (cat2->c1.src1_c) {
568 src1.reg = (reg_t)(cat2->c1.src1);
569 src1.c = true;
570 } else if (cat2->rel1.src1_rel) {
571 src1.reg = (reg_t)(cat2->rel1.src1);
572 src1.c = cat2->rel1.src1_c;
573 src1.addr_rel = true;
574 } else {
575 src1.reg = (reg_t)(cat2->src1);
576 }
577 print_src(ctx, &src1);
578
579 struct reginfo src2 = {
580 .r = cat2->repeat ? cat2->src2_r : 0,
581 .full = cat2->full,
582 .f = is_cat2_float(opc),
583 .abs = cat2->src2_abs,
584 .neg = cat2->src2_neg,
585 .im = cat2->src2_im,
586 };
587 switch (opc) {
588 case OPC_ABSNEG_F:
589 case OPC_ABSNEG_S:
590 case OPC_CLZ_B:
591 case OPC_CLZ_S:
592 case OPC_SIGN_F:
593 case OPC_FLOOR_F:
594 case OPC_CEIL_F:
595 case OPC_RNDNE_F:
596 case OPC_RNDAZ_F:
597 case OPC_TRUNC_F:
598 case OPC_NOT_B:
599 case OPC_BFREV_B:
600 case OPC_SETRM:
601 case OPC_CBITS_B:
602 /* these only have one src reg */
603 break;
604 default:
605 fprintf(ctx->out, ", ");
606 if (cat2->c2.src2_c) {
607 src2.reg = (reg_t)(cat2->c2.src2);
608 src2.c = true;
609 } else if (cat2->rel2.src2_rel) {
610 src2.reg = (reg_t)(cat2->rel2.src2);
611 src2.c = cat2->rel2.src2_c;
612 src2.addr_rel = true;
613 } else {
614 src2.reg = (reg_t)(cat2->src2);
615 }
616 print_src(ctx, &src2);
617 break;
618 }
619 }
620
621 static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr)
622 {
623 instr_cat3_t *cat3 = &instr->cat3;
624 bool full = instr_cat3_full(cat3);
625
626 fprintf(ctx->out, " ");
627 print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false);
628 fprintf(ctx->out, ", ");
629
630 struct reginfo src1 = {
631 .r = cat3->repeat ? cat3->src1_r : 0,
632 .full = full,
633 .neg = cat3->src1_neg,
634 };
635 if (cat3->c1.src1_c) {
636 src1.reg = (reg_t)(cat3->c1.src1);
637 src1.c = true;
638 } else if (cat3->rel1.src1_rel) {
639 src1.reg = (reg_t)(cat3->rel1.src1);
640 src1.c = cat3->rel1.src1_c;
641 src1.addr_rel = true;
642 } else {
643 src1.reg = (reg_t)(cat3->src1);
644 }
645 print_src(ctx, &src1);
646
647 fprintf(ctx->out, ", ");
648 struct reginfo src2 = {
649 .reg = (reg_t)cat3->src2,
650 .full = full,
651 .r = cat3->repeat ? cat3->src2_r : 0,
652 .c = cat3->src2_c,
653 .neg = cat3->src2_neg,
654 };
655 print_src(ctx, &src2);
656
657 fprintf(ctx->out, ", ");
658 struct reginfo src3 = {
659 .r = cat3->src3_r,
660 .full = full,
661 .neg = cat3->src3_neg,
662 };
663 if (cat3->c2.src3_c) {
664 src3.reg = (reg_t)(cat3->c2.src3);
665 src3.c = true;
666 } else if (cat3->rel2.src3_rel) {
667 src3.reg = (reg_t)(cat3->rel2.src3);
668 src3.c = cat3->rel2.src3_c;
669 src3.addr_rel = true;
670 } else {
671 src3.reg = (reg_t)(cat3->src3);
672 }
673 print_src(ctx, &src3);
674 }
675
676 static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr)
677 {
678 instr_cat4_t *cat4 = &instr->cat4;
679
680 fprintf(ctx->out, " ");
681 print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
682 fprintf(ctx->out, ", ");
683
684 struct reginfo src = {
685 .r = cat4->src_r,
686 .im = cat4->src_im,
687 .full = cat4->full,
688 .neg = cat4->src_neg,
689 .abs = cat4->src_abs,
690 };
691 if (cat4->c.src_c) {
692 src.reg = (reg_t)(cat4->c.src);
693 src.c = true;
694 } else if (cat4->rel.src_rel) {
695 src.reg = (reg_t)(cat4->rel.src);
696 src.c = cat4->rel.src_c;
697 src.addr_rel = true;
698 } else {
699 src.reg = (reg_t)(cat4->src);
700 }
701 print_src(ctx, &src);
702
703 if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
704 fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
705 }
706
707 static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
708 {
709 static const struct {
710 bool src1, src2, samp, tex;
711 } info[0x1f] = {
712 [opc_op(OPC_ISAM)] = { true, false, true, true, },
713 [opc_op(OPC_ISAML)] = { true, true, true, true, },
714 [opc_op(OPC_ISAMM)] = { true, false, true, true, },
715 [opc_op(OPC_SAM)] = { true, false, true, true, },
716 [opc_op(OPC_SAMB)] = { true, true, true, true, },
717 [opc_op(OPC_SAML)] = { true, true, true, true, },
718 [opc_op(OPC_SAMGQ)] = { true, false, true, true, },
719 [opc_op(OPC_GETLOD)] = { true, false, true, true, },
720 [opc_op(OPC_CONV)] = { true, true, true, true, },
721 [opc_op(OPC_CONVM)] = { true, true, true, true, },
722 [opc_op(OPC_GETSIZE)] = { true, false, false, true, },
723 [opc_op(OPC_GETBUF)] = { false, false, false, true, },
724 [opc_op(OPC_GETPOS)] = { true, false, false, true, },
725 [opc_op(OPC_GETINFO)] = { false, false, false, true, },
726 [opc_op(OPC_DSX)] = { true, false, false, false, },
727 [opc_op(OPC_DSY)] = { true, false, false, false, },
728 [opc_op(OPC_GATHER4R)] = { true, false, true, true, },
729 [opc_op(OPC_GATHER4G)] = { true, false, true, true, },
730 [opc_op(OPC_GATHER4B)] = { true, false, true, true, },
731 [opc_op(OPC_GATHER4A)] = { true, false, true, true, },
732 [opc_op(OPC_SAMGP0)] = { true, false, true, true, },
733 [opc_op(OPC_SAMGP1)] = { true, false, true, true, },
734 [opc_op(OPC_SAMGP2)] = { true, false, true, true, },
735 [opc_op(OPC_SAMGP3)] = { true, false, true, true, },
736 [opc_op(OPC_DSXPP_1)] = { true, false, false, false, },
737 [opc_op(OPC_DSYPP_1)] = { true, false, false, false, },
738 [opc_op(OPC_RGETPOS)] = { true, false, false, false, },
739 [opc_op(OPC_RGETINFO)] = { false, false, false, false, },
740 };
741
742 static const struct {
743 bool indirect;
744 bool bindless;
745 bool use_a1;
746 bool uniform;
747 } desc_features[8] = {
748 [CAT5_NONUNIFORM] = { .indirect = true, },
749 [CAT5_UNIFORM] = { .indirect = true, .uniform = true, },
750 [CAT5_BINDLESS_IMM] = { .bindless = true, },
751 [CAT5_BINDLESS_UNIFORM] = {
752 .bindless = true,
753 .indirect = true,
754 .uniform = true,
755 },
756 [CAT5_BINDLESS_NONUNIFORM] = {
757 .bindless = true,
758 .indirect = true,
759 },
760 [CAT5_BINDLESS_A1_IMM] = {
761 .bindless = true,
762 .use_a1 = true,
763 },
764 [CAT5_BINDLESS_A1_UNIFORM] = {
765 .bindless = true,
766 .indirect = true,
767 .uniform = true,
768 .use_a1 = true,
769 },
770 [CAT5_BINDLESS_A1_NONUNIFORM] = {
771 .bindless = true,
772 .indirect = true,
773 .use_a1 = true,
774 },
775 };
776
777 instr_cat5_t *cat5 = &instr->cat5;
778 int i;
779
780 bool desc_indirect =
781 cat5->is_s2en_bindless &&
782 desc_features[cat5->s2en_bindless.desc_mode].indirect;
783 bool bindless =
784 cat5->is_s2en_bindless &&
785 desc_features[cat5->s2en_bindless.desc_mode].bindless;
786 bool use_a1 =
787 cat5->is_s2en_bindless &&
788 desc_features[cat5->s2en_bindless.desc_mode].use_a1;
789 bool uniform =
790 cat5->is_s2en_bindless &&
791 desc_features[cat5->s2en_bindless.desc_mode].uniform;
792
793 if (cat5->is_3d) fprintf(ctx->out, ".3d");
794 if (cat5->is_a) fprintf(ctx->out, ".a");
795 if (cat5->is_o) fprintf(ctx->out, ".o");
796 if (cat5->is_p) fprintf(ctx->out, ".p");
797 if (cat5->is_s) fprintf(ctx->out, ".s");
798 if (desc_indirect) fprintf(ctx->out, ".s2en");
799 if (uniform) fprintf(ctx->out, ".uniform");
800
801 if (bindless) {
802 unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo;
803 fprintf(ctx->out, ".base%d", base);
804 }
805
806 fprintf(ctx->out, " ");
807
808 switch (_OPC(5, cat5->opc)) {
809 case OPC_DSXPP_1:
810 case OPC_DSYPP_1:
811 break;
812 default:
813 fprintf(ctx->out, "(%s)", type[cat5->type]);
814 break;
815 }
816
817 fprintf(ctx->out, "(");
818 for (i = 0; i < 4; i++)
819 if (cat5->wrmask & (1 << i))
820 fprintf(ctx->out, "%c", "xyzw"[i]);
821 fprintf(ctx->out, ")");
822
823 print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
824
825 if (info[cat5->opc].src1) {
826 fprintf(ctx->out, ", ");
827 struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full };
828 print_src(ctx, &src);
829 }
830
831 if (cat5->is_o || info[cat5->opc].src2) {
832 fprintf(ctx->out, ", ");
833 struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full };
834 print_src(ctx, &src);
835 }
836 if (cat5->is_s2en_bindless) {
837 if (!desc_indirect) {
838 if (info[cat5->opc].samp) {
839 if (use_a1)
840 fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3);
841 else
842 fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf);
843 }
844
845 if (info[cat5->opc].tex && !use_a1) {
846 fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4);
847 }
848 }
849 } else {
850 if (info[cat5->opc].samp)
851 fprintf(ctx->out, ", s#%d", cat5->norm.samp);
852 if (info[cat5->opc].tex)
853 fprintf(ctx->out, ", t#%d", cat5->norm.tex);
854 }
855
856 if (desc_indirect) {
857 fprintf(ctx->out, ", ");
858 struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless };
859 print_src(ctx, &src);
860 }
861
862 if (use_a1)
863 fprintf(ctx->out, ", a1.x");
864
865 if (debug & PRINT_VERBOSE) {
866 if (cat5->is_s2en_bindless) {
867 if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1)
868 fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1);
869 } else {
870 if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1)
871 fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1);
872 }
873 }
874 }
875
876 static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
877 {
878 instr_cat6_t *cat6 = &instr->cat6;
879 char sd = 0, ss = 0; /* dst/src address space */
880 bool nodst = false;
881 struct reginfo dst, src1, src2, ssbo;
882 int src1off = 0;
883
884 memset(&dst, 0, sizeof(dst));
885 memset(&src1, 0, sizeof(src1));
886 memset(&src2, 0, sizeof(src2));
887 memset(&ssbo, 0, sizeof(ssbo));
888
889 switch (_OPC(6, cat6->opc)) {
890 case OPC_RESINFO:
891 case OPC_RESFMT:
892 dst.full = type_size(cat6->type) == 32;
893 src1.full = type_size(cat6->type) == 32;
894 src2.full = type_size(cat6->type) == 32;
895 break;
896 case OPC_L2G:
897 case OPC_G2L:
898 dst.full = true;
899 src1.full = true;
900 src2.full = true;
901 break;
902 case OPC_STG:
903 case OPC_STL:
904 case OPC_STP:
905 case OPC_STLW:
906 case OPC_STIB:
907 dst.full = type_size(cat6->type) == 32;
908 src1.full = type_size(cat6->type) == 32;
909 src2.full = type_size(cat6->type) == 32;
910 break;
911 default:
912 dst.full = type_size(cat6->type) == 32;
913 src1.full = true;
914 src2.full = true;
915 break;
916 }
917
918 switch (_OPC(6, cat6->opc)) {
919 case OPC_PREFETCH:
920 break;
921 case OPC_RESINFO:
922 fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
923 break;
924 case OPC_LDGB:
925 fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
926 fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
927 fprintf(ctx->out, ".%s", type[cat6->type]);
928 fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
929 break;
930 case OPC_STGB:
931 case OPC_STIB:
932 fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped");
933 fprintf(ctx->out, ".%dd", cat6->stgb.d + 1);
934 fprintf(ctx->out, ".%s", type[cat6->type]);
935 fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1);
936 break;
937 case OPC_ATOMIC_ADD:
938 case OPC_ATOMIC_SUB:
939 case OPC_ATOMIC_XCHG:
940 case OPC_ATOMIC_INC:
941 case OPC_ATOMIC_DEC:
942 case OPC_ATOMIC_CMPXCHG:
943 case OPC_ATOMIC_MIN:
944 case OPC_ATOMIC_MAX:
945 case OPC_ATOMIC_AND:
946 case OPC_ATOMIC_OR:
947 case OPC_ATOMIC_XOR:
948 ss = cat6->g ? 'g' : 'l';
949 fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
950 fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
951 fprintf(ctx->out, ".%s", type[cat6->type]);
952 fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
953 fprintf(ctx->out, ".%c", ss);
954 break;
955 default:
956 dst.im = cat6->g && !cat6->dst_off;
957 fprintf(ctx->out, ".%s", type[cat6->type]);
958 break;
959 }
960 fprintf(ctx->out, " ");
961
962 switch (_OPC(6, cat6->opc)) {
963 case OPC_STG:
964 sd = 'g';
965 break;
966 case OPC_STP:
967 sd = 'p';
968 break;
969 case OPC_STL:
970 case OPC_STLW:
971 sd = 'l';
972 break;
973
974 case OPC_LDG:
975 case OPC_LDC:
976 ss = 'g';
977 break;
978 case OPC_LDP:
979 ss = 'p';
980 break;
981 case OPC_LDL:
982 case OPC_LDLW:
983 case OPC_LDLV:
984 ss = 'l';
985 break;
986
987 case OPC_L2G:
988 ss = 'l';
989 sd = 'g';
990 break;
991
992 case OPC_G2L:
993 ss = 'g';
994 sd = 'l';
995 break;
996
997 case OPC_PREFETCH:
998 ss = 'g';
999 nodst = true;
1000 break;
1001 }
1002
1003 if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) {
1004 struct reginfo src3;
1005
1006 memset(&src3, 0, sizeof(src3));
1007
1008 src1.reg = (reg_t)(cat6->stgb.src1);
1009 src2.reg = (reg_t)(cat6->stgb.src2);
1010 src2.im = cat6->stgb.src2_im;
1011 src3.reg = (reg_t)(cat6->stgb.src3);
1012 src3.im = cat6->stgb.src3_im;
1013 src3.full = true;
1014
1015 fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo);
1016 print_src(ctx, &src1);
1017 fprintf(ctx->out, ", ");
1018 print_src(ctx, &src2);
1019 fprintf(ctx->out, ", ");
1020 print_src(ctx, &src3);
1021
1022 if (debug & PRINT_VERBOSE)
1023 fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3);
1024
1025 return;
1026 }
1027
1028 if (is_atomic(_OPC(6, cat6->opc))) {
1029
1030 src1.reg = (reg_t)(cat6->ldgb.src1);
1031 src1.im = cat6->ldgb.src1_im;
1032 src2.reg = (reg_t)(cat6->ldgb.src2);
1033 src2.im = cat6->ldgb.src2_im;
1034 dst.reg = (reg_t)(cat6->ldgb.dst);
1035
1036 print_src(ctx, &dst);
1037 fprintf(ctx->out, ", ");
1038 if (ss == 'g') {
1039 struct reginfo src3;
1040 memset(&src3, 0, sizeof(src3));
1041
1042 src3.reg = (reg_t)(cat6->ldgb.src3);
1043 src3.full = true;
1044
1045 /* For images, the ".typed" variant is used and src2 is
1046 * the ivecN coordinates, ie ivec2 for 2d.
1047 *
1048 * For SSBOs, the ".untyped" variant is used and src2 is
1049 * a simple dword offset.. src3 appears to be
1050 * uvec2(offset * 4, 0). Not sure the point of that.
1051 */
1052
1053 fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
1054 print_src(ctx, &src1); /* value */
1055 fprintf(ctx->out, ", ");
1056 print_src(ctx, &src2); /* offset/coords */
1057 fprintf(ctx->out, ", ");
1058 print_src(ctx, &src3); /* 64b byte offset.. */
1059
1060 if (debug & PRINT_VERBOSE) {
1061 fprintf(ctx->out, " (pad0=%x, mustbe0=%x)", cat6->ldgb.pad0,
1062 cat6->ldgb.mustbe0);
1063 }
1064 } else { /* ss == 'l' */
1065 fprintf(ctx->out, "l[");
1066 print_src(ctx, &src1); /* simple byte offset */
1067 fprintf(ctx->out, "], ");
1068 print_src(ctx, &src2); /* value */
1069
1070 if (debug & PRINT_VERBOSE) {
1071 fprintf(ctx->out, " (src3=%x, pad0=%x, src_ssbo_im=%x, mustbe0=%x)",
1072 cat6->ldgb.src3, cat6->ldgb.pad0,
1073 cat6->ldgb.src_ssbo_im, cat6->ldgb.mustbe0);
1074 }
1075 }
1076
1077 return;
1078 } else if (_OPC(6, cat6->opc) == OPC_RESINFO) {
1079 dst.reg = (reg_t)(cat6->ldgb.dst);
1080 ssbo.reg = (reg_t)(cat6->ldgb.src_ssbo);
1081 ssbo.im = cat6->ldgb.src_ssbo_im;
1082
1083 print_src(ctx, &dst);
1084 fprintf(ctx->out, ", ");
1085
1086 fprintf(ctx->out, "g[");
1087 print_src(ctx, &ssbo);
1088 fprintf(ctx->out, "]");
1089
1090 return;
1091 } else if (_OPC(6, cat6->opc) == OPC_LDGB) {
1092
1093 src1.reg = (reg_t)(cat6->ldgb.src1);
1094 src1.im = cat6->ldgb.src1_im;
1095 src2.reg = (reg_t)(cat6->ldgb.src2);
1096 src2.im = cat6->ldgb.src2_im;
1097 ssbo.reg = (reg_t)(cat6->ldgb.src_ssbo);
1098 ssbo.im = cat6->ldgb.src_ssbo_im;
1099 dst.reg = (reg_t)(cat6->ldgb.dst);
1100
1101 print_src(ctx, &dst);
1102 fprintf(ctx->out, ", ");
1103
1104 fprintf(ctx->out, "g[");
1105 print_src(ctx, &ssbo);
1106 fprintf(ctx->out, "], ");
1107
1108 print_src(ctx, &src1);
1109 fprintf(ctx->out, ", ");
1110 print_src(ctx, &src2);
1111
1112 if (debug & PRINT_VERBOSE)
1113 fprintf(ctx->out, " (pad0=%x, ssbo_im=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.src_ssbo_im, cat6->ldgb.mustbe0);
1114
1115 return;
1116 } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) {
1117 struct reginfo src3;
1118
1119 memset(&src3, 0, sizeof(src3));
1120 src1.reg = (reg_t)(cat6->a.src1);
1121 src2.reg = (reg_t)(cat6->a.src2);
1122 src2.im = cat6->a.src2_im;
1123 src3.reg = (reg_t)(cat6->a.off);
1124 src3.full = true;
1125 dst.reg = (reg_t)(cat6->d.dst);
1126
1127 print_src(ctx, &dst);
1128 fprintf(ctx->out, ", g[");
1129 print_src(ctx, &src1);
1130 fprintf(ctx->out, "+");
1131 print_src(ctx, &src3);
1132 fprintf(ctx->out, "], ");
1133 print_src(ctx, &src2);
1134
1135 return;
1136 }
1137
1138 if (cat6->src_off) {
1139 src1.reg = (reg_t)(cat6->a.src1);
1140 src1.im = cat6->a.src1_im;
1141 src2.reg = (reg_t)(cat6->a.src2);
1142 src2.im = cat6->a.src2_im;
1143 src1off = cat6->a.off;
1144 } else {
1145 src1.reg = (reg_t)(cat6->b.src1);
1146 src1.im = cat6->b.src1_im;
1147 src2.reg = (reg_t)(cat6->b.src2);
1148 src2.im = cat6->b.src2_im;
1149 }
1150
1151 if (!nodst) {
1152 if (sd)
1153 fprintf(ctx->out, "%c[", sd);
1154 /* note: dst might actually be a src (ie. address to store to) */
1155 if (cat6->dst_off) {
1156 dst.reg = (reg_t)(cat6->c.dst);
1157 print_src(ctx, &dst);
1158 if (cat6->g) {
1159 struct reginfo dstoff_reg = {
1160 .reg = (reg_t) cat6->c.off,
1161 .full = true
1162 };
1163 fprintf(ctx->out, "+");
1164 print_src(ctx, &dstoff_reg);
1165 } else if (cat6->c.off || cat6->c.off_high) {
1166 fprintf(ctx->out, "%+d", ((uint32_t)cat6->c.off_high << 8) | cat6->c.off);
1167 }
1168 } else {
1169 dst.reg = (reg_t)(cat6->d.dst);
1170 print_src(ctx, &dst);
1171 }
1172 if (sd)
1173 fprintf(ctx->out, "]");
1174 fprintf(ctx->out, ", ");
1175 }
1176
1177 if (ss)
1178 fprintf(ctx->out, "%c[", ss);
1179
1180 /* can have a larger than normal immed, so hack: */
1181 if (src1.im) {
1182 fprintf(ctx->out, "%u", src1.reg.dummy13);
1183 } else {
1184 print_src(ctx, &src1);
1185 }
1186
1187 if (cat6->src_off && cat6->g)
1188 print_src(ctx, &src2);
1189 else if (src1off)
1190 fprintf(ctx->out, "%+d", src1off);
1191 if (ss)
1192 fprintf(ctx->out, "]");
1193
1194 switch (_OPC(6, cat6->opc)) {
1195 case OPC_RESINFO:
1196 case OPC_RESFMT:
1197 break;
1198 default:
1199 fprintf(ctx->out, ", ");
1200 print_src(ctx, &src2);
1201 break;
1202 }
1203 }
1204
1205 static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
1206 {
1207 instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
1208 struct reginfo src1, src2, ssbo;
1209 uint32_t opc = _OPC(6, cat6->opc);
1210 bool uses_type = opc != OPC_LDC;
1211
1212 static const struct {
1213 bool indirect;
1214 bool bindless;
1215 const char *name;
1216 } desc_features[8] = {
1217 [CAT6_IMM] = {
1218 .name = "imm"
1219 },
1220 [CAT6_UNIFORM] = {
1221 .indirect = true,
1222 .name = "uniform"
1223 },
1224 [CAT6_NONUNIFORM] = {
1225 .indirect = true,
1226 .name = "nonuniform"
1227 },
1228 [CAT6_BINDLESS_IMM] = {
1229 .bindless = true,
1230 .name = "imm"
1231 },
1232 [CAT6_BINDLESS_UNIFORM] = {
1233 .bindless = true,
1234 .indirect = true,
1235 .name = "uniform"
1236 },
1237 [CAT6_BINDLESS_NONUNIFORM] = {
1238 .bindless = true,
1239 .indirect = true,
1240 .name = "nonuniform"
1241 },
1242 };
1243
1244 bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
1245 bool bindless = desc_features[cat6->desc_mode].bindless;
1246 bool type_full = cat6->type != TYPE_U16;
1247
1248
1249 memset(&src1, 0, sizeof(src1));
1250 memset(&src2, 0, sizeof(src2));
1251 memset(&ssbo, 0, sizeof(ssbo));
1252
1253 if (uses_type) {
1254 fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
1255 fprintf(ctx->out, ".%dd", cat6->d + 1);
1256 fprintf(ctx->out, ".%s", type[cat6->type]);
1257 } else {
1258 fprintf(ctx->out, ".offset%d", cat6->d);
1259 }
1260 fprintf(ctx->out, ".%u", cat6->type_size + 1);
1261
1262 fprintf(ctx->out, ".%s", desc_features[cat6->desc_mode].name);
1263 if (bindless)
1264 fprintf(ctx->out, ".base%d", cat6->base);
1265 fprintf(ctx->out, " ");
1266
1267 src2.reg = (reg_t)(cat6->src2);
1268 src2.full = type_full;
1269 print_src(ctx, &src2);
1270 fprintf(ctx->out, ", ");
1271
1272 if (opc != OPC_RESINFO) {
1273 src1.reg = (reg_t)(cat6->src1);
1274 src1.full = true; // XXX
1275 print_src(ctx, &src1);
1276 fprintf(ctx->out, ", ");
1277 }
1278
1279 ssbo.reg = (reg_t)(cat6->ssbo);
1280 ssbo.im = !indirect_ssbo;
1281 ssbo.full = true;
1282 print_src(ctx, &ssbo);
1283
1284 if (debug & PRINT_VERBOSE) {
1285 fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
1286 cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5);
1287 }
1288 }
1289
1290 static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr)
1291 {
1292 if (!is_cat6_legacy(instr, ctx->gpu_id)) {
1293 print_instr_cat6_a6xx(ctx, instr);
1294 if (debug & PRINT_VERBOSE)
1295 fprintf(ctx->out, " NEW");
1296 } else {
1297 print_instr_cat6_a3xx(ctx, instr);
1298 if (debug & PRINT_VERBOSE)
1299 fprintf(ctx->out, " LEGACY");
1300 }
1301 }
1302 static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr)
1303 {
1304 instr_cat7_t *cat7 = &instr->cat7;
1305
1306 if (cat7->g)
1307 fprintf(ctx->out, ".g");
1308 if (cat7->l)
1309 fprintf(ctx->out, ".l");
1310
1311 if (_OPC(7, cat7->opc) == OPC_FENCE) {
1312 if (cat7->r)
1313 fprintf(ctx->out, ".r");
1314 if (cat7->w)
1315 fprintf(ctx->out, ".w");
1316 }
1317 }
1318
1319 /* size of largest OPC field of all the instruction categories: */
1320 #define NOPC_BITS 6
1321
1322 static const struct opc_info {
1323 uint16_t cat;
1324 uint16_t opc;
1325 const char *name;
1326 void (*print)(struct disasm_ctx *ctx, instr_t *instr);
1327 } opcs[1 << (3+NOPC_BITS)] = {
1328 #define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
1329 /* category 0: */
1330 OPC(0, OPC_NOP, nop),
1331 OPC(0, OPC_B, b),
1332 OPC(0, OPC_JUMP, jump),
1333 OPC(0, OPC_CALL, call),
1334 OPC(0, OPC_RET, ret),
1335 OPC(0, OPC_KILL, kill),
1336 OPC(0, OPC_END, end),
1337 OPC(0, OPC_EMIT, emit),
1338 OPC(0, OPC_CUT, cut),
1339 OPC(0, OPC_CHMASK, chmask),
1340 OPC(0, OPC_CHSH, chsh),
1341 OPC(0, OPC_FLOW_REV, flow_rev),
1342 OPC(0, OPC_PREDT, predt),
1343 OPC(0, OPC_PREDF, predf),
1344 OPC(0, OPC_PREDE, prede),
1345 OPC(0, OPC_BKT, bkt),
1346 OPC(0, OPC_STKS, stks),
1347 OPC(0, OPC_STKR, stkr),
1348 OPC(0, OPC_XSET, xset),
1349 OPC(0, OPC_XCLR, xclr),
1350 OPC(0, OPC_GETONE, getone),
1351 OPC(0, OPC_DBG, dbg),
1352 OPC(0, OPC_SHPS, shps),
1353 OPC(0, OPC_SHPE, shpe),
1354
1355 /* category 1: */
1356 OPC(1, OPC_MOV, ),
1357
1358 /* category 2: */
1359 OPC(2, OPC_ADD_F, add.f),
1360 OPC(2, OPC_MIN_F, min.f),
1361 OPC(2, OPC_MAX_F, max.f),
1362 OPC(2, OPC_MUL_F, mul.f),
1363 OPC(2, OPC_SIGN_F, sign.f),
1364 OPC(2, OPC_CMPS_F, cmps.f),
1365 OPC(2, OPC_ABSNEG_F, absneg.f),
1366 OPC(2, OPC_CMPV_F, cmpv.f),
1367 OPC(2, OPC_FLOOR_F, floor.f),
1368 OPC(2, OPC_CEIL_F, ceil.f),
1369 OPC(2, OPC_RNDNE_F, rndne.f),
1370 OPC(2, OPC_RNDAZ_F, rndaz.f),
1371 OPC(2, OPC_TRUNC_F, trunc.f),
1372 OPC(2, OPC_ADD_U, add.u),
1373 OPC(2, OPC_ADD_S, add.s),
1374 OPC(2, OPC_SUB_U, sub.u),
1375 OPC(2, OPC_SUB_S, sub.s),
1376 OPC(2, OPC_CMPS_U, cmps.u),
1377 OPC(2, OPC_CMPS_S, cmps.s),
1378 OPC(2, OPC_MIN_U, min.u),
1379 OPC(2, OPC_MIN_S, min.s),
1380 OPC(2, OPC_MAX_U, max.u),
1381 OPC(2, OPC_MAX_S, max.s),
1382 OPC(2, OPC_ABSNEG_S, absneg.s),
1383 OPC(2, OPC_AND_B, and.b),
1384 OPC(2, OPC_OR_B, or.b),
1385 OPC(2, OPC_NOT_B, not.b),
1386 OPC(2, OPC_XOR_B, xor.b),
1387 OPC(2, OPC_CMPV_U, cmpv.u),
1388 OPC(2, OPC_CMPV_S, cmpv.s),
1389 OPC(2, OPC_MUL_U24, mul.u24),
1390 OPC(2, OPC_MUL_S24, mul.s24),
1391 OPC(2, OPC_MULL_U, mull.u),
1392 OPC(2, OPC_BFREV_B, bfrev.b),
1393 OPC(2, OPC_CLZ_S, clz.s),
1394 OPC(2, OPC_CLZ_B, clz.b),
1395 OPC(2, OPC_SHL_B, shl.b),
1396 OPC(2, OPC_SHR_B, shr.b),
1397 OPC(2, OPC_ASHR_B, ashr.b),
1398 OPC(2, OPC_BARY_F, bary.f),
1399 OPC(2, OPC_MGEN_B, mgen.b),
1400 OPC(2, OPC_GETBIT_B, getbit.b),
1401 OPC(2, OPC_SETRM, setrm),
1402 OPC(2, OPC_CBITS_B, cbits.b),
1403 OPC(2, OPC_SHB, shb),
1404 OPC(2, OPC_MSAD, msad),
1405
1406 /* category 3: */
1407 OPC(3, OPC_MAD_U16, mad.u16),
1408 OPC(3, OPC_MADSH_U16, madsh.u16),
1409 OPC(3, OPC_MAD_S16, mad.s16),
1410 OPC(3, OPC_MADSH_M16, madsh.m16),
1411 OPC(3, OPC_MAD_U24, mad.u24),
1412 OPC(3, OPC_MAD_S24, mad.s24),
1413 OPC(3, OPC_MAD_F16, mad.f16),
1414 OPC(3, OPC_MAD_F32, mad.f32),
1415 OPC(3, OPC_SEL_B16, sel.b16),
1416 OPC(3, OPC_SEL_B32, sel.b32),
1417 OPC(3, OPC_SEL_S16, sel.s16),
1418 OPC(3, OPC_SEL_S32, sel.s32),
1419 OPC(3, OPC_SEL_F16, sel.f16),
1420 OPC(3, OPC_SEL_F32, sel.f32),
1421 OPC(3, OPC_SAD_S16, sad.s16),
1422 OPC(3, OPC_SAD_S32, sad.s32),
1423
1424 /* category 4: */
1425 OPC(4, OPC_RCP, rcp),
1426 OPC(4, OPC_RSQ, rsq),
1427 OPC(4, OPC_LOG2, log2),
1428 OPC(4, OPC_EXP2, exp2),
1429 OPC(4, OPC_SIN, sin),
1430 OPC(4, OPC_COS, cos),
1431 OPC(4, OPC_SQRT, sqrt),
1432 OPC(4, OPC_HRSQ, hrsq),
1433 OPC(4, OPC_HLOG2, hlog2),
1434 OPC(4, OPC_HEXP2, hexp2),
1435
1436 /* category 5: */
1437 OPC(5, OPC_ISAM, isam),
1438 OPC(5, OPC_ISAML, isaml),
1439 OPC(5, OPC_ISAMM, isamm),
1440 OPC(5, OPC_SAM, sam),
1441 OPC(5, OPC_SAMB, samb),
1442 OPC(5, OPC_SAML, saml),
1443 OPC(5, OPC_SAMGQ, samgq),
1444 OPC(5, OPC_GETLOD, getlod),
1445 OPC(5, OPC_CONV, conv),
1446 OPC(5, OPC_CONVM, convm),
1447 OPC(5, OPC_GETSIZE, getsize),
1448 OPC(5, OPC_GETBUF, getbuf),
1449 OPC(5, OPC_GETPOS, getpos),
1450 OPC(5, OPC_GETINFO, getinfo),
1451 OPC(5, OPC_DSX, dsx),
1452 OPC(5, OPC_DSY, dsy),
1453 OPC(5, OPC_GATHER4R, gather4r),
1454 OPC(5, OPC_GATHER4G, gather4g),
1455 OPC(5, OPC_GATHER4B, gather4b),
1456 OPC(5, OPC_GATHER4A, gather4a),
1457 OPC(5, OPC_SAMGP0, samgp0),
1458 OPC(5, OPC_SAMGP1, samgp1),
1459 OPC(5, OPC_SAMGP2, samgp2),
1460 OPC(5, OPC_SAMGP3, samgp3),
1461 OPC(5, OPC_DSXPP_1, dsxpp.1),
1462 OPC(5, OPC_DSYPP_1, dsypp.1),
1463 OPC(5, OPC_RGETPOS, rgetpos),
1464 OPC(5, OPC_RGETINFO, rgetinfo),
1465 /* macros are needed here for ir3_print */
1466 OPC(5, OPC_DSXPP_MACRO, dsxpp.macro),
1467 OPC(5, OPC_DSYPP_MACRO, dsypp.macro),
1468
1469
1470 /* category 6: */
1471 OPC(6, OPC_LDG, ldg),
1472 OPC(6, OPC_LDL, ldl),
1473 OPC(6, OPC_LDP, ldp),
1474 OPC(6, OPC_STG, stg),
1475 OPC(6, OPC_STL, stl),
1476 OPC(6, OPC_STP, stp),
1477 OPC(6, OPC_LDIB, ldib),
1478 OPC(6, OPC_G2L, g2l),
1479 OPC(6, OPC_L2G, l2g),
1480 OPC(6, OPC_PREFETCH, prefetch),
1481 OPC(6, OPC_LDLW, ldlw),
1482 OPC(6, OPC_STLW, stlw),
1483 OPC(6, OPC_RESFMT, resfmt),
1484 OPC(6, OPC_RESINFO, resinfo),
1485 OPC(6, OPC_ATOMIC_ADD, atomic.add),
1486 OPC(6, OPC_ATOMIC_SUB, atomic.sub),
1487 OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),
1488 OPC(6, OPC_ATOMIC_INC, atomic.inc),
1489 OPC(6, OPC_ATOMIC_DEC, atomic.dec),
1490 OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
1491 OPC(6, OPC_ATOMIC_MIN, atomic.min),
1492 OPC(6, OPC_ATOMIC_MAX, atomic.max),
1493 OPC(6, OPC_ATOMIC_AND, atomic.and),
1494 OPC(6, OPC_ATOMIC_OR, atomic.or),
1495 OPC(6, OPC_ATOMIC_XOR, atomic.xor),
1496 OPC(6, OPC_LDGB, ldgb),
1497 OPC(6, OPC_STGB, stgb),
1498 OPC(6, OPC_STIB, stib),
1499 OPC(6, OPC_LDC, ldc),
1500 OPC(6, OPC_LDLV, ldlv),
1501
1502 OPC(7, OPC_BAR, bar),
1503 OPC(7, OPC_FENCE, fence),
1504
1505 #undef OPC
1506 };
1507
1508 #define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
1509
1510 const char *disasm_a3xx_instr_name(opc_t opc)
1511 {
1512 if (opc_cat(opc) == -1) return "??meta??";
1513 return opcs[opc].name;
1514 }
1515
1516 static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr)
1517 {
1518 const char *name = GETINFO(instr)->name;
1519 uint32_t opc = instr_opc(instr, ctx->gpu_id);
1520
1521 if (name) {
1522 fprintf(ctx->out, "%s", name);
1523 GETINFO(instr)->print(ctx, instr);
1524 } else {
1525 fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc);
1526
1527 switch (instr->opc_cat) {
1528 case 0: print_instr_cat0(ctx, instr); break;
1529 case 1: print_instr_cat1(ctx, instr); break;
1530 case 2: print_instr_cat2(ctx, instr); break;
1531 case 3: print_instr_cat3(ctx, instr); break;
1532 case 4: print_instr_cat4(ctx, instr); break;
1533 case 5: print_instr_cat5(ctx, instr); break;
1534 case 6: print_instr_cat6(ctx, instr); break;
1535 case 7: print_instr_cat7(ctx, instr); break;
1536 }
1537 }
1538 }
1539
1540 static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
1541 {
1542 instr_t *instr = (instr_t *)dwords;
1543 opc_t opc = _OPC(instr->opc_cat, instr_opc(instr, ctx->gpu_id));
1544 unsigned nop = 0;
1545 unsigned cycles = ctx->stats->instructions;
1546
1547 if (debug & PRINT_RAW) {
1548 fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
1549 instr->opc_cat, n, cycles++, dwords[1], dwords[0]);
1550 }
1551
1552 if (opc == OPC_BARY_F)
1553 ctx->stats->last_baryf = ctx->stats->instructions;
1554
1555 ctx->repeat = instr_repeat(instr);
1556 ctx->stats->instructions += 1 + ctx->repeat;
1557 ctx->stats->instlen++;
1558
1559 /* NOTE: order flags are printed is a bit fugly.. but for now I
1560 * try to match the order in llvm-a3xx disassembler for easy
1561 * diff'ing..
1562 */
1563
1564 if (instr->sync) {
1565 fprintf(ctx->out, "(sy)");
1566 ctx->stats->sy++;
1567 }
1568 if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) {
1569 fprintf(ctx->out, "(ss)");
1570 ctx->stats->ss++;
1571 }
1572 if (instr->jmp_tgt)
1573 fprintf(ctx->out, "(jp)");
1574 if ((instr->opc_cat == 0) && instr->cat0.eq)
1575 fprintf(ctx->out, "(eq)");
1576 if (instr_sat(instr))
1577 fprintf(ctx->out, "(sat)");
1578 if (ctx->repeat)
1579 fprintf(ctx->out, "(rpt%d)", ctx->repeat);
1580 else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r))
1581 nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
1582 else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r))
1583 nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
1584 if (nop)
1585 fprintf(ctx->out, "(nop%d) ", nop);
1586
1587 if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
1588 fprintf(ctx->out, "(ul)");
1589
1590 ctx->stats->instructions += nop;
1591 ctx->stats->nops += nop;
1592 if (opc == OPC_NOP) {
1593 ctx->stats->nops += 1 + ctx->repeat;
1594 ctx->stats->instrs_per_cat[0] += 1 + ctx->repeat;
1595 } else {
1596 ctx->stats->instrs_per_cat[instr->opc_cat] += 1 + ctx->repeat;
1597 ctx->stats->instrs_per_cat[0] += nop;
1598 }
1599
1600 if (opc == OPC_MOV) {
1601 if (instr->cat1.src_type == instr->cat1.dst_type) {
1602 ctx->stats->mov_count += 1 + ctx->repeat;
1603 } else {
1604 ctx->stats->cov_count += 1 + ctx->repeat;
1605 }
1606 }
1607
1608 print_single_instr(ctx, instr);
1609 fprintf(ctx->out, "\n");
1610
1611 process_reg_dst(ctx);
1612
1613 if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) {
1614 int i;
1615 for (i = 0; i < nop; i++) {
1616 if (debug & PRINT_VERBOSE) {
1617 fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ",
1618 levels[ctx->level], instr->opc_cat, n, cycles++);
1619 }
1620 fprintf(ctx->out, "nop\n");
1621 }
1622 for (i = 0; i < ctx->repeat; i++) {
1623 ctx->repeatidx = i + 1;
1624 if (debug & PRINT_VERBOSE) {
1625 fprintf(ctx->out, "%s:%d:%04d:%04d[ ] ",
1626 levels[ctx->level], instr->opc_cat, n, cycles++);
1627 }
1628 print_single_instr(ctx, instr);
1629 fprintf(ctx->out, "\n");
1630 }
1631 ctx->repeatidx = 0;
1632 }
1633
1634 return (instr->opc_cat == 0) &&
1635 ((opc == OPC_END) || (opc == OPC_CHSH));
1636 }
1637
1638 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
1639 {
1640 struct shader_stats stats;
1641 return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
1642 }
1643
1644 int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
1645 unsigned gpu_id, struct shader_stats *stats)
1646 {
1647 struct disasm_ctx ctx;
1648 int i;
1649 int nop_count = 0;
1650 bool has_end = false;
1651
1652 ir3_assert((sizedwords % 2) == 0);
1653
1654 memset(&ctx, 0, sizeof(ctx));
1655 ctx.out = out;
1656 ctx.level = level;
1657 ctx.gpu_id = gpu_id;
1658 ctx.stats = stats;
1659 memset(ctx.stats, 0, sizeof(*ctx.stats));
1660
1661 for (i = 0; i < sizedwords; i += 2) {
1662 has_end |= print_instr(&ctx, &dwords[i], i/2);
1663 if (!has_end)
1664 continue;
1665 if (dwords[i] == 0 && dwords[i + 1] == 0)
1666 nop_count++;
1667 else
1668 nop_count = 0;
1669 if (nop_count > 3)
1670 break;
1671 }
1672
1673 if (debug & PRINT_STATS)
1674 print_reg_stats(&ctx);
1675
1676 return 0;
1677 }
1678
1679 void disasm_a3xx_set_debug(enum debug_t d)
1680 {
1681 debug = d;
1682 }