freedreno/ir3: Add new synchronization opcodes
[mesa.git] / src / freedreno / ir3 / disasm-a3xx.c
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <assert.h>
30
31 #include <util/u_debug.h>
32
33 #include "instr-a3xx.h"
34
35 /* bitmask of debug flags */
36 enum debug_t {
37 PRINT_RAW = 0x1, /* dump raw hexdump */
38 PRINT_VERBOSE = 0x2,
39 };
40
41 static enum debug_t debug;
42
43 #define printf debug_printf
44
45 static const char *levels[] = {
46 "",
47 "\t",
48 "\t\t",
49 "\t\t\t",
50 "\t\t\t\t",
51 "\t\t\t\t\t",
52 "\t\t\t\t\t\t",
53 "\t\t\t\t\t\t\t",
54 "\t\t\t\t\t\t\t\t",
55 "\t\t\t\t\t\t\t\t\t",
56 "x",
57 "x",
58 "x",
59 "x",
60 "x",
61 "x",
62 };
63
64 static const char *component = "xyzw";
65
66 static const char *type[] = {
67 [TYPE_F16] = "f16",
68 [TYPE_F32] = "f32",
69 [TYPE_U16] = "u16",
70 [TYPE_U32] = "u32",
71 [TYPE_S16] = "s16",
72 [TYPE_S32] = "s32",
73 [TYPE_U8] = "u8",
74 [TYPE_S8] = "s8",
75 };
76
77 struct disasm_ctx {
78 FILE *out;
79 int level;
80 unsigned gpu_id;
81
82 /* current instruction repeat flag: */
83 unsigned repeat;
84 };
85
86 static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
87 bool c, bool im, bool neg, bool abs, bool addr_rel)
88 {
89 const char type = c ? 'c' : 'r';
90
91 // XXX I prefer - and || for neg/abs, but preserving format used
92 // by libllvm-a3xx for easy diffing..
93
94 if (abs && neg)
95 fprintf(ctx->out, "(absneg)");
96 else if (neg)
97 fprintf(ctx->out, "(neg)");
98 else if (abs)
99 fprintf(ctx->out, "(abs)");
100
101 if (r)
102 fprintf(ctx->out, "(r)");
103
104 if (im) {
105 fprintf(ctx->out, "%d", reg.iim_val);
106 } else if (addr_rel) {
107 /* I would just use %+d but trying to make it diff'able with
108 * libllvm-a3xx...
109 */
110 if (reg.iim_val < 0)
111 fprintf(ctx->out, "%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
112 else if (reg.iim_val > 0)
113 fprintf(ctx->out, "%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
114 else
115 fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
116 } else if ((reg.num == REG_A0) && !c) {
117 fprintf(ctx->out, "a0.%c", component[reg.comp]);
118 } else if ((reg.num == REG_P0) && !c) {
119 fprintf(ctx->out, "p0.%c", component[reg.comp]);
120 } else {
121 fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
122 }
123 }
124
125
126 static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
127 {
128 print_reg(ctx, reg, full, false, false, false, false, false, addr_rel);
129 }
130
131 static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
132 bool c, bool im, bool neg, bool abs, bool addr_rel)
133 {
134 print_reg(ctx, reg, full, r, c, im, neg, abs, addr_rel);
135 }
136
137 /* TODO switch to using reginfo struct everywhere, since more readable
138 * than passing a bunch of bools to print_reg_src
139 */
140
141 struct reginfo {
142 reg_t reg;
143 bool full;
144 bool r;
145 bool c;
146 bool im;
147 bool neg;
148 bool abs;
149 bool addr_rel;
150 };
151
152 static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
153 {
154 print_reg_src(ctx, info->reg, info->full, info->r, info->c, info->im,
155 info->neg, info->abs, info->addr_rel);
156 }
157
158 //static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
159 //{
160 // print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
161 //}
162
163 static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr)
164 {
165 instr_cat0_t *cat0 = &instr->cat0;
166
167 switch (cat0->opc) {
168 case OPC_KILL:
169 case OPC_CONDEND:
170 fprintf(ctx->out, " %sp0.%c", cat0->inv ? "!" : "",
171 component[cat0->comp]);
172 break;
173 case OPC_BR:
174 fprintf(ctx->out, " %sp0.%c, #%d", cat0->inv ? "!" : "",
175 component[cat0->comp], cat0->a3xx.immed);
176 break;
177 case OPC_JUMP:
178 case OPC_CALL:
179 fprintf(ctx->out, " #%d", cat0->a3xx.immed);
180 break;
181 }
182
183 if ((debug & PRINT_VERBOSE) && (cat0->dummy2|cat0->dummy3|cat0->dummy4))
184 fprintf(ctx->out, "\t{0: %x,%x,%x}", cat0->dummy2, cat0->dummy3, cat0->dummy4);
185 }
186
187 static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr)
188 {
189 instr_cat1_t *cat1 = &instr->cat1;
190
191 if (cat1->ul)
192 fprintf(ctx->out, "(ul)");
193
194 if (cat1->src_type == cat1->dst_type) {
195 if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
196 /* special case (nmemonic?): */
197 fprintf(ctx->out, "mova");
198 } else {
199 fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
200 }
201 } else {
202 fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
203 }
204
205 fprintf(ctx->out, " ");
206
207 if (cat1->even)
208 fprintf(ctx->out, "(even)");
209
210 if (cat1->pos_inf)
211 fprintf(ctx->out, "(pos_infinity)");
212
213 print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
214 cat1->dst_rel);
215
216 fprintf(ctx->out, ", ");
217
218 /* ugg, have to special case this.. vs print_reg().. */
219 if (cat1->src_im) {
220 if (type_float(cat1->src_type))
221 fprintf(ctx->out, "(%f)", cat1->fim_val);
222 else if (type_uint(cat1->src_type))
223 fprintf(ctx->out, "0x%08x", cat1->uim_val);
224 else
225 fprintf(ctx->out, "%d", cat1->iim_val);
226 } else if (cat1->src_rel && !cat1->src_c) {
227 /* I would just use %+d but trying to make it diff'able with
228 * libllvm-a3xx...
229 */
230 char type = cat1->src_rel_c ? 'c' : 'r';
231 if (cat1->off < 0)
232 fprintf(ctx->out, "%c<a0.x - %d>", type, -cat1->off);
233 else if (cat1->off > 0)
234 fprintf(ctx->out, "%c<a0.x + %d>", type, cat1->off);
235 else
236 fprintf(ctx->out, "%c<a0.x>", type);
237 } else {
238 print_reg_src(ctx, (reg_t)(cat1->src), type_size(cat1->src_type) == 32,
239 cat1->src_r, cat1->src_c, cat1->src_im, false, false, false);
240 }
241
242 if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
243 fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0);
244 }
245
246 static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr)
247 {
248 instr_cat2_t *cat2 = &instr->cat2;
249 static const char *cond[] = {
250 "lt",
251 "le",
252 "gt",
253 "ge",
254 "eq",
255 "ne",
256 "?6?",
257 };
258
259 switch (_OPC(2, cat2->opc)) {
260 case OPC_CMPS_F:
261 case OPC_CMPS_U:
262 case OPC_CMPS_S:
263 case OPC_CMPV_F:
264 case OPC_CMPV_U:
265 case OPC_CMPV_S:
266 fprintf(ctx->out, ".%s", cond[cat2->cond]);
267 break;
268 }
269
270 fprintf(ctx->out, " ");
271 if (cat2->ei)
272 fprintf(ctx->out, "(ei)");
273 print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
274 fprintf(ctx->out, ", ");
275
276 unsigned src1_r = cat2->repeat ? cat2->src1_r : 0;
277 if (cat2->c1.src1_c) {
278 print_reg_src(ctx, (reg_t)(cat2->c1.src1), cat2->full, src1_r,
279 cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg,
280 cat2->src1_abs, false);
281 } else if (cat2->rel1.src1_rel) {
282 print_reg_src(ctx, (reg_t)(cat2->rel1.src1), cat2->full, src1_r,
283 cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg,
284 cat2->src1_abs, cat2->rel1.src1_rel);
285 } else {
286 print_reg_src(ctx, (reg_t)(cat2->src1), cat2->full, src1_r,
287 false, cat2->src1_im, cat2->src1_neg,
288 cat2->src1_abs, false);
289 }
290
291 unsigned src2_r = cat2->repeat ? cat2->src2_r : 0;
292 switch (_OPC(2, cat2->opc)) {
293 case OPC_ABSNEG_F:
294 case OPC_ABSNEG_S:
295 case OPC_CLZ_B:
296 case OPC_CLZ_S:
297 case OPC_SIGN_F:
298 case OPC_FLOOR_F:
299 case OPC_CEIL_F:
300 case OPC_RNDNE_F:
301 case OPC_RNDAZ_F:
302 case OPC_TRUNC_F:
303 case OPC_NOT_B:
304 case OPC_BFREV_B:
305 case OPC_SETRM:
306 case OPC_CBITS_B:
307 /* these only have one src reg */
308 break;
309 default:
310 fprintf(ctx->out, ", ");
311 if (cat2->c2.src2_c) {
312 print_reg_src(ctx, (reg_t)(cat2->c2.src2), cat2->full, src2_r,
313 cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg,
314 cat2->src2_abs, false);
315 } else if (cat2->rel2.src2_rel) {
316 print_reg_src(ctx, (reg_t)(cat2->rel2.src2), cat2->full, src2_r,
317 cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg,
318 cat2->src2_abs, cat2->rel2.src2_rel);
319 } else {
320 print_reg_src(ctx, (reg_t)(cat2->src2), cat2->full, src2_r,
321 false, cat2->src2_im, cat2->src2_neg,
322 cat2->src2_abs, false);
323 }
324 break;
325 }
326 }
327
328 static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr)
329 {
330 instr_cat3_t *cat3 = &instr->cat3;
331 bool full = instr_cat3_full(cat3);
332
333 fprintf(ctx->out, " ");
334 print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false);
335 fprintf(ctx->out, ", ");
336 unsigned src1_r = cat3->repeat ? cat3->src1_r : 0;
337 if (cat3->c1.src1_c) {
338 print_reg_src(ctx, (reg_t)(cat3->c1.src1), full,
339 src1_r, cat3->c1.src1_c, false, cat3->src1_neg,
340 false, false);
341 } else if (cat3->rel1.src1_rel) {
342 print_reg_src(ctx, (reg_t)(cat3->rel1.src1), full,
343 src1_r, cat3->rel1.src1_c, false, cat3->src1_neg,
344 false, cat3->rel1.src1_rel);
345 } else {
346 print_reg_src(ctx, (reg_t)(cat3->src1), full,
347 src1_r, false, false, cat3->src1_neg,
348 false, false);
349 }
350 fprintf(ctx->out, ", ");
351 unsigned src2_r = cat3->repeat ? cat3->src2_r : 0;
352 print_reg_src(ctx, (reg_t)cat3->src2, full,
353 src2_r, cat3->src2_c, false, cat3->src2_neg,
354 false, false);
355 fprintf(ctx->out, ", ");
356 if (cat3->c2.src3_c) {
357 print_reg_src(ctx, (reg_t)(cat3->c2.src3), full,
358 cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg,
359 false, false);
360 } else if (cat3->rel2.src3_rel) {
361 print_reg_src(ctx, (reg_t)(cat3->rel2.src3), full,
362 cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg,
363 false, cat3->rel2.src3_rel);
364 } else {
365 print_reg_src(ctx, (reg_t)(cat3->src3), full,
366 cat3->src3_r, false, false, cat3->src3_neg,
367 false, false);
368 }
369 }
370
371 static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr)
372 {
373 instr_cat4_t *cat4 = &instr->cat4;
374
375 fprintf(ctx->out, " ");
376 print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
377 fprintf(ctx->out, ", ");
378
379 if (cat4->c.src_c) {
380 print_reg_src(ctx, (reg_t)(cat4->c.src), cat4->full,
381 cat4->src_r, cat4->c.src_c, cat4->src_im,
382 cat4->src_neg, cat4->src_abs, false);
383 } else if (cat4->rel.src_rel) {
384 print_reg_src(ctx, (reg_t)(cat4->rel.src), cat4->full,
385 cat4->src_r, cat4->rel.src_c, cat4->src_im,
386 cat4->src_neg, cat4->src_abs, cat4->rel.src_rel);
387 } else {
388 print_reg_src(ctx, (reg_t)(cat4->src), cat4->full,
389 cat4->src_r, false, cat4->src_im,
390 cat4->src_neg, cat4->src_abs, false);
391 }
392
393 if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
394 fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
395 }
396
397 static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
398 {
399 static const struct {
400 bool src1, src2, samp, tex;
401 } info[0x1f] = {
402 [opc_op(OPC_ISAM)] = { true, false, true, true, },
403 [opc_op(OPC_ISAML)] = { true, true, true, true, },
404 [opc_op(OPC_ISAMM)] = { true, false, true, true, },
405 [opc_op(OPC_SAM)] = { true, false, true, true, },
406 [opc_op(OPC_SAMB)] = { true, true, true, true, },
407 [opc_op(OPC_SAML)] = { true, true, true, true, },
408 [opc_op(OPC_SAMGQ)] = { true, false, true, true, },
409 [opc_op(OPC_GETLOD)] = { true, false, true, true, },
410 [opc_op(OPC_CONV)] = { true, true, true, true, },
411 [opc_op(OPC_CONVM)] = { true, true, true, true, },
412 [opc_op(OPC_GETSIZE)] = { true, false, false, true, },
413 [opc_op(OPC_GETBUF)] = { false, false, false, true, },
414 [opc_op(OPC_GETPOS)] = { true, false, false, true, },
415 [opc_op(OPC_GETINFO)] = { false, false, false, true, },
416 [opc_op(OPC_DSX)] = { true, false, false, false, },
417 [opc_op(OPC_DSY)] = { true, false, false, false, },
418 [opc_op(OPC_GATHER4R)] = { true, false, true, true, },
419 [opc_op(OPC_GATHER4G)] = { true, false, true, true, },
420 [opc_op(OPC_GATHER4B)] = { true, false, true, true, },
421 [opc_op(OPC_GATHER4A)] = { true, false, true, true, },
422 [opc_op(OPC_SAMGP0)] = { true, false, true, true, },
423 [opc_op(OPC_SAMGP1)] = { true, false, true, true, },
424 [opc_op(OPC_SAMGP2)] = { true, false, true, true, },
425 [opc_op(OPC_SAMGP3)] = { true, false, true, true, },
426 [opc_op(OPC_DSXPP_1)] = { true, false, false, false, },
427 [opc_op(OPC_DSYPP_1)] = { true, false, false, false, },
428 [opc_op(OPC_RGETPOS)] = { true, false, false, false, },
429 [opc_op(OPC_RGETINFO)] = { false, false, false, false, },
430 };
431 instr_cat5_t *cat5 = &instr->cat5;
432 int i;
433
434 if (cat5->is_3d) fprintf(ctx->out, ".3d");
435 if (cat5->is_a) fprintf(ctx->out, ".a");
436 if (cat5->is_o) fprintf(ctx->out, ".o");
437 if (cat5->is_p) fprintf(ctx->out, ".p");
438 if (cat5->is_s) fprintf(ctx->out, ".s");
439 if (cat5->is_s2en) fprintf(ctx->out, ".s2en");
440
441 fprintf(ctx->out, " ");
442
443 switch (_OPC(5, cat5->opc)) {
444 case OPC_DSXPP_1:
445 case OPC_DSYPP_1:
446 break;
447 default:
448 fprintf(ctx->out, "(%s)", type[cat5->type]);
449 break;
450 }
451
452 fprintf(ctx->out, "(");
453 for (i = 0; i < 4; i++)
454 if (cat5->wrmask & (1 << i))
455 fprintf(ctx->out, "%c", "xyzw"[i]);
456 fprintf(ctx->out, ")");
457
458 print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
459
460 if (info[cat5->opc].src1) {
461 fprintf(ctx->out, ", ");
462 print_reg_src(ctx, (reg_t)(cat5->src1), cat5->full, false, false, false,
463 false, false, false);
464 }
465
466 if (cat5->is_s2en) {
467 if (cat5->is_o || info[cat5->opc].src2) {
468 fprintf(ctx->out, ", ");
469 print_reg_src(ctx, (reg_t)(cat5->s2en.src2), cat5->full,
470 false, false, false, false, false, false);
471 }
472 fprintf(ctx->out, ", ");
473 print_reg_src(ctx, (reg_t)(cat5->s2en.src3), false, false, false, false,
474 false, false, false);
475 } else {
476 if (cat5->is_o || info[cat5->opc].src2) {
477 fprintf(ctx->out, ", ");
478 print_reg_src(ctx, (reg_t)(cat5->norm.src2), cat5->full,
479 false, false, false, false, false, false);
480 }
481 if (info[cat5->opc].samp)
482 fprintf(ctx->out, ", s#%d", cat5->norm.samp);
483 if (info[cat5->opc].tex)
484 fprintf(ctx->out, ", t#%d", cat5->norm.tex);
485 }
486
487 if (debug & PRINT_VERBOSE) {
488 if (cat5->is_s2en) {
489 if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
490 fprintf(ctx->out, "\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
491 } else {
492 if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
493 fprintf(ctx->out, "\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
494 }
495 }
496 }
497
498 static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
499 {
500 instr_cat6_t *cat6 = &instr->cat6;
501 char sd = 0, ss = 0; /* dst/src address space */
502 bool nodst = false;
503 struct reginfo dst, src1, src2;
504 int src1off = 0, dstoff = 0;
505
506 memset(&dst, 0, sizeof(dst));
507 memset(&src1, 0, sizeof(src1));
508 memset(&src2, 0, sizeof(src2));
509
510 switch (_OPC(6, cat6->opc)) {
511 case OPC_RESINFO:
512 case OPC_RESFMT:
513 dst.full = type_size(cat6->type) == 32;
514 src1.full = type_size(cat6->type) == 32;
515 src2.full = type_size(cat6->type) == 32;
516 break;
517 case OPC_L2G:
518 case OPC_G2L:
519 dst.full = true;
520 src1.full = true;
521 src2.full = true;
522 break;
523 case OPC_STG:
524 case OPC_STL:
525 case OPC_STP:
526 case OPC_STLW:
527 case OPC_STIB:
528 dst.full = true;
529 src1.full = type_size(cat6->type) == 32;
530 src2.full = type_size(cat6->type) == 32;
531 break;
532 default:
533 dst.full = type_size(cat6->type) == 32;
534 src1.full = true;
535 src2.full = true;
536 break;
537 }
538
539 switch (_OPC(6, cat6->opc)) {
540 case OPC_PREFETCH:
541 break;
542 case OPC_RESINFO:
543 fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
544 break;
545 case OPC_LDGB:
546 fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
547 fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
548 fprintf(ctx->out, ".%s", type[cat6->type]);
549 fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
550 break;
551 case OPC_STGB:
552 case OPC_STIB:
553 fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped");
554 fprintf(ctx->out, ".%dd", cat6->stgb.d + 1);
555 fprintf(ctx->out, ".%s", type[cat6->type]);
556 fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1);
557 break;
558 case OPC_ATOMIC_ADD:
559 case OPC_ATOMIC_SUB:
560 case OPC_ATOMIC_XCHG:
561 case OPC_ATOMIC_INC:
562 case OPC_ATOMIC_DEC:
563 case OPC_ATOMIC_CMPXCHG:
564 case OPC_ATOMIC_MIN:
565 case OPC_ATOMIC_MAX:
566 case OPC_ATOMIC_AND:
567 case OPC_ATOMIC_OR:
568 case OPC_ATOMIC_XOR:
569 ss = cat6->g ? 'g' : 'l';
570 fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
571 fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
572 fprintf(ctx->out, ".%s", type[cat6->type]);
573 fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
574 fprintf(ctx->out, ".%c", ss);
575 break;
576 default:
577 dst.im = cat6->g && !cat6->dst_off;
578 fprintf(ctx->out, ".%s", type[cat6->type]);
579 break;
580 }
581 fprintf(ctx->out, " ");
582
583 switch (_OPC(6, cat6->opc)) {
584 case OPC_STG:
585 sd = 'g';
586 break;
587 case OPC_STP:
588 sd = 'p';
589 break;
590 case OPC_STL:
591 case OPC_STLW:
592 sd = 'l';
593 break;
594
595 case OPC_LDG:
596 case OPC_LDC:
597 ss = 'g';
598 break;
599 case OPC_LDP:
600 ss = 'p';
601 break;
602 case OPC_LDL:
603 case OPC_LDLW:
604 case OPC_LDLV:
605 ss = 'l';
606 break;
607
608 case OPC_L2G:
609 ss = 'l';
610 sd = 'g';
611 break;
612
613 case OPC_G2L:
614 ss = 'g';
615 sd = 'l';
616 break;
617
618 case OPC_PREFETCH:
619 ss = 'g';
620 nodst = true;
621 break;
622 }
623
624 if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) {
625 struct reginfo src3;
626
627 memset(&src3, 0, sizeof(src3));
628
629 src1.reg = (reg_t)(cat6->stgb.src1);
630 src2.reg = (reg_t)(cat6->stgb.src2);
631 src2.im = cat6->stgb.src2_im;
632 src3.reg = (reg_t)(cat6->stgb.src3);
633 src3.im = cat6->stgb.src3_im;
634 src3.full = true;
635
636 fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo);
637 print_src(ctx, &src1);
638 fprintf(ctx->out, ", ");
639 print_src(ctx, &src2);
640 fprintf(ctx->out, ", ");
641 print_src(ctx, &src3);
642
643 if (debug & PRINT_VERBOSE)
644 fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3);
645
646 return;
647 }
648
649 if (is_atomic(_OPC(6, cat6->opc))) {
650
651 src1.reg = (reg_t)(cat6->ldgb.src1);
652 src1.im = cat6->ldgb.src1_im;
653 src2.reg = (reg_t)(cat6->ldgb.src2);
654 src2.im = cat6->ldgb.src2_im;
655 dst.reg = (reg_t)(cat6->ldgb.dst);
656
657 print_src(ctx, &dst);
658 fprintf(ctx->out, ", ");
659 if (ss == 'g') {
660 struct reginfo src3;
661 memset(&src3, 0, sizeof(src3));
662
663 src3.reg = (reg_t)(cat6->ldgb.src3);
664 src3.full = true;
665
666 /* For images, the ".typed" variant is used and src2 is
667 * the ivecN coordinates, ie ivec2 for 2d.
668 *
669 * For SSBOs, the ".untyped" variant is used and src2 is
670 * a simple dword offset.. src3 appears to be
671 * uvec2(offset * 4, 0). Not sure the point of that.
672 */
673
674 fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
675 print_src(ctx, &src1); /* value */
676 fprintf(ctx->out, ", ");
677 print_src(ctx, &src2); /* offset/coords */
678 fprintf(ctx->out, ", ");
679 print_src(ctx, &src3); /* 64b byte offset.. */
680
681 if (debug & PRINT_VERBOSE) {
682 fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0,
683 cat6->ldgb.pad3, cat6->ldgb.mustbe0);
684 }
685 } else { /* ss == 'l' */
686 fprintf(ctx->out, "l[");
687 print_src(ctx, &src1); /* simple byte offset */
688 fprintf(ctx->out, "], ");
689 print_src(ctx, &src2); /* value */
690
691 if (debug & PRINT_VERBOSE) {
692 fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)",
693 cat6->ldgb.src3, cat6->ldgb.pad0,
694 cat6->ldgb.pad3, cat6->ldgb.mustbe0);
695 }
696 }
697
698 return;
699 } else if (_OPC(6, cat6->opc) == OPC_RESINFO) {
700 dst.reg = (reg_t)(cat6->ldgb.dst);
701
702 print_src(ctx, &dst);
703 fprintf(ctx->out, ", ");
704 fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo);
705
706 return;
707 } else if (_OPC(6, cat6->opc) == OPC_LDGB) {
708
709 src1.reg = (reg_t)(cat6->ldgb.src1);
710 src1.im = cat6->ldgb.src1_im;
711 src2.reg = (reg_t)(cat6->ldgb.src2);
712 src2.im = cat6->ldgb.src2_im;
713 dst.reg = (reg_t)(cat6->ldgb.dst);
714
715 print_src(ctx, &dst);
716 fprintf(ctx->out, ", ");
717 fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
718 print_src(ctx, &src1);
719 fprintf(ctx->out, ", ");
720 print_src(ctx, &src2);
721
722 if (debug & PRINT_VERBOSE)
723 fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0);
724
725 return;
726 } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) {
727 struct reginfo src3;
728
729 memset(&src3, 0, sizeof(src3));
730 src1.reg = (reg_t)(cat6->a.src1);
731 src2.reg = (reg_t)(cat6->a.src2);
732 src2.im = cat6->a.src2_im;
733 src3.reg = (reg_t)(cat6->a.off);
734 src3.full = true;
735 dst.reg = (reg_t)(cat6->d.dst);
736
737 print_src(ctx, &dst);
738 fprintf(ctx->out, ", g[");
739 print_src(ctx, &src1);
740 fprintf(ctx->out, "+");
741 print_src(ctx, &src3);
742 fprintf(ctx->out, "], ");
743 print_src(ctx, &src2);
744
745 return;
746 }
747 if (cat6->dst_off) {
748 dst.reg = (reg_t)(cat6->c.dst);
749 dstoff = cat6->c.off;
750 } else {
751 dst.reg = (reg_t)(cat6->d.dst);
752 }
753
754 if (cat6->src_off) {
755 src1.reg = (reg_t)(cat6->a.src1);
756 src1.im = cat6->a.src1_im;
757 src2.reg = (reg_t)(cat6->a.src2);
758 src2.im = cat6->a.src2_im;
759 src1off = cat6->a.off;
760 } else {
761 src1.reg = (reg_t)(cat6->b.src1);
762 src1.im = cat6->b.src1_im;
763 src2.reg = (reg_t)(cat6->b.src2);
764 src2.im = cat6->b.src2_im;
765 }
766
767 if (!nodst) {
768 if (sd)
769 fprintf(ctx->out, "%c[", sd);
770 /* note: dst might actually be a src (ie. address to store to) */
771 print_src(ctx, &dst);
772 if (cat6->dst_off && cat6->g) {
773 struct reginfo dstoff_reg = {};
774 dstoff_reg.reg = (reg_t) cat6->c.off;
775 dstoff_reg.full = true;
776 fprintf(ctx->out, "+");
777 print_src(ctx, &dstoff_reg);
778 } else if (dstoff) {
779 fprintf(ctx->out, "%+d", dstoff);
780 }
781 if (sd)
782 fprintf(ctx->out, "]");
783 fprintf(ctx->out, ", ");
784 }
785
786 if (ss)
787 fprintf(ctx->out, "%c[", ss);
788
789 /* can have a larger than normal immed, so hack: */
790 if (src1.im) {
791 fprintf(ctx->out, "%u", src1.reg.dummy13);
792 } else {
793 print_src(ctx, &src1);
794 }
795
796 if (cat6->src_off && cat6->g)
797 print_src(ctx, &src2);
798 else if (src1off)
799 fprintf(ctx->out, "%+d", src1off);
800 if (ss)
801 fprintf(ctx->out, "]");
802
803 switch (_OPC(6, cat6->opc)) {
804 case OPC_RESINFO:
805 case OPC_RESFMT:
806 break;
807 default:
808 fprintf(ctx->out, ", ");
809 print_src(ctx, &src2);
810 break;
811 }
812 }
813
814 static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
815 {
816 instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
817 struct reginfo src1, src2;
818 bool has_dest = _OPC(6, cat6->opc) == OPC_LDIB;
819 char ss = 0;
820
821 memset(&src1, 0, sizeof(src1));
822 memset(&src2, 0, sizeof(src2));
823
824 fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
825 fprintf(ctx->out, ".%dd", cat6->d + 1);
826 fprintf(ctx->out, ".%s", type[cat6->type]);
827 fprintf(ctx->out, ".%u ", cat6->type_size + 1);
828
829 if (has_dest) {
830 src2.reg = (reg_t)(cat6->src2);
831 src2.full = true; // XXX
832 print_src(ctx, &src2);
833
834 fprintf(ctx->out, ", ");
835 }
836
837 /* NOTE: blob seems to use old encoding for ldl/stl (local memory) */
838 ss = 'g';
839
840 fprintf(ctx->out, "%c[%u", ss, cat6->ssbo);
841 fprintf(ctx->out, "] + ");
842 src1.reg = (reg_t)(cat6->src1);
843 src1.full = true; // XXX
844 print_src(ctx, &src1);
845
846 if (!has_dest) {
847 fprintf(ctx->out, ", ");
848
849 src2.reg = (reg_t)(cat6->src2);
850 src2.full = true; // XXX
851 print_src(ctx, &src2);
852 }
853
854 if (debug & PRINT_VERBOSE) {
855 fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x)", cat6->pad1,
856 cat6->pad2, cat6->pad3, cat6->pad4);
857 }
858 }
859
860 static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr)
861 {
862 if (!is_cat6_legacy(instr, ctx->gpu_id)) {
863 print_instr_cat6_a6xx(ctx, instr);
864 if (debug & PRINT_VERBOSE)
865 fprintf(ctx->out, " NEW");
866 } else {
867 print_instr_cat6_a3xx(ctx, instr);
868 if (debug & PRINT_VERBOSE)
869 fprintf(ctx->out, " LEGACY");
870 }
871 }
872 static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr)
873 {
874 instr_cat7_t *cat7 = &instr->cat7;
875
876 if (cat7->g)
877 fprintf(ctx->out, ".g");
878 if (cat7->l)
879 fprintf(ctx->out, ".l");
880
881 if (_OPC(7, cat7->opc) == OPC_FENCE) {
882 if (cat7->r)
883 fprintf(ctx->out, ".r");
884 if (cat7->w)
885 fprintf(ctx->out, ".w");
886 }
887 }
888
889 /* size of largest OPC field of all the instruction categories: */
890 #define NOPC_BITS 6
891
892 static const struct opc_info {
893 uint16_t cat;
894 uint16_t opc;
895 const char *name;
896 void (*print)(struct disasm_ctx *ctx, instr_t *instr);
897 } opcs[1 << (3+NOPC_BITS)] = {
898 #define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
899 /* category 0: */
900 OPC(0, OPC_NOP, nop),
901 OPC(0, OPC_BR, br),
902 OPC(0, OPC_JUMP, jump),
903 OPC(0, OPC_CALL, call),
904 OPC(0, OPC_RET, ret),
905 OPC(0, OPC_KILL, kill),
906 OPC(0, OPC_END, end),
907 OPC(0, OPC_EMIT, emit),
908 OPC(0, OPC_CUT, cut),
909 OPC(0, OPC_CHMASK, chmask),
910 OPC(0, OPC_CHSH, chsh),
911 OPC(0, OPC_FLOW_REV, flow_rev),
912 OPC(0, OPC_CONDEND, condend),
913 OPC(0, OPC_ENDPATCH, endpatch),
914
915 /* category 1: */
916 OPC(1, OPC_MOV, ),
917
918 /* category 2: */
919 OPC(2, OPC_ADD_F, add.f),
920 OPC(2, OPC_MIN_F, min.f),
921 OPC(2, OPC_MAX_F, max.f),
922 OPC(2, OPC_MUL_F, mul.f),
923 OPC(2, OPC_SIGN_F, sign.f),
924 OPC(2, OPC_CMPS_F, cmps.f),
925 OPC(2, OPC_ABSNEG_F, absneg.f),
926 OPC(2, OPC_CMPV_F, cmpv.f),
927 OPC(2, OPC_FLOOR_F, floor.f),
928 OPC(2, OPC_CEIL_F, ceil.f),
929 OPC(2, OPC_RNDNE_F, rndne.f),
930 OPC(2, OPC_RNDAZ_F, rndaz.f),
931 OPC(2, OPC_TRUNC_F, trunc.f),
932 OPC(2, OPC_ADD_U, add.u),
933 OPC(2, OPC_ADD_S, add.s),
934 OPC(2, OPC_SUB_U, sub.u),
935 OPC(2, OPC_SUB_S, sub.s),
936 OPC(2, OPC_CMPS_U, cmps.u),
937 OPC(2, OPC_CMPS_S, cmps.s),
938 OPC(2, OPC_MIN_U, min.u),
939 OPC(2, OPC_MIN_S, min.s),
940 OPC(2, OPC_MAX_U, max.u),
941 OPC(2, OPC_MAX_S, max.s),
942 OPC(2, OPC_ABSNEG_S, absneg.s),
943 OPC(2, OPC_AND_B, and.b),
944 OPC(2, OPC_OR_B, or.b),
945 OPC(2, OPC_NOT_B, not.b),
946 OPC(2, OPC_XOR_B, xor.b),
947 OPC(2, OPC_CMPV_U, cmpv.u),
948 OPC(2, OPC_CMPV_S, cmpv.s),
949 OPC(2, OPC_MUL_U24, mul.u24),
950 OPC(2, OPC_MUL_S24, mul.s24),
951 OPC(2, OPC_MULL_U, mull.u),
952 OPC(2, OPC_BFREV_B, bfrev.b),
953 OPC(2, OPC_CLZ_S, clz.s),
954 OPC(2, OPC_CLZ_B, clz.b),
955 OPC(2, OPC_SHL_B, shl.b),
956 OPC(2, OPC_SHR_B, shr.b),
957 OPC(2, OPC_ASHR_B, ashr.b),
958 OPC(2, OPC_BARY_F, bary.f),
959 OPC(2, OPC_MGEN_B, mgen.b),
960 OPC(2, OPC_GETBIT_B, getbit.b),
961 OPC(2, OPC_SETRM, setrm),
962 OPC(2, OPC_CBITS_B, cbits.b),
963 OPC(2, OPC_SHB, shb),
964 OPC(2, OPC_MSAD, msad),
965
966 /* category 3: */
967 OPC(3, OPC_MAD_U16, mad.u16),
968 OPC(3, OPC_MADSH_U16, madsh.u16),
969 OPC(3, OPC_MAD_S16, mad.s16),
970 OPC(3, OPC_MADSH_M16, madsh.m16),
971 OPC(3, OPC_MAD_U24, mad.u24),
972 OPC(3, OPC_MAD_S24, mad.s24),
973 OPC(3, OPC_MAD_F16, mad.f16),
974 OPC(3, OPC_MAD_F32, mad.f32),
975 OPC(3, OPC_SEL_B16, sel.b16),
976 OPC(3, OPC_SEL_B32, sel.b32),
977 OPC(3, OPC_SEL_S16, sel.s16),
978 OPC(3, OPC_SEL_S32, sel.s32),
979 OPC(3, OPC_SEL_F16, sel.f16),
980 OPC(3, OPC_SEL_F32, sel.f32),
981 OPC(3, OPC_SAD_S16, sad.s16),
982 OPC(3, OPC_SAD_S32, sad.s32),
983
984 /* category 4: */
985 OPC(4, OPC_RCP, rcp),
986 OPC(4, OPC_RSQ, rsq),
987 OPC(4, OPC_LOG2, log2),
988 OPC(4, OPC_EXP2, exp2),
989 OPC(4, OPC_SIN, sin),
990 OPC(4, OPC_COS, cos),
991 OPC(4, OPC_SQRT, sqrt),
992
993 /* category 5: */
994 OPC(5, OPC_ISAM, isam),
995 OPC(5, OPC_ISAML, isaml),
996 OPC(5, OPC_ISAMM, isamm),
997 OPC(5, OPC_SAM, sam),
998 OPC(5, OPC_SAMB, samb),
999 OPC(5, OPC_SAML, saml),
1000 OPC(5, OPC_SAMGQ, samgq),
1001 OPC(5, OPC_GETLOD, getlod),
1002 OPC(5, OPC_CONV, conv),
1003 OPC(5, OPC_CONVM, convm),
1004 OPC(5, OPC_GETSIZE, getsize),
1005 OPC(5, OPC_GETBUF, getbuf),
1006 OPC(5, OPC_GETPOS, getpos),
1007 OPC(5, OPC_GETINFO, getinfo),
1008 OPC(5, OPC_DSX, dsx),
1009 OPC(5, OPC_DSY, dsy),
1010 OPC(5, OPC_GATHER4R, gather4r),
1011 OPC(5, OPC_GATHER4G, gather4g),
1012 OPC(5, OPC_GATHER4B, gather4b),
1013 OPC(5, OPC_GATHER4A, gather4a),
1014 OPC(5, OPC_SAMGP0, samgp0),
1015 OPC(5, OPC_SAMGP1, samgp1),
1016 OPC(5, OPC_SAMGP2, samgp2),
1017 OPC(5, OPC_SAMGP3, samgp3),
1018 OPC(5, OPC_DSXPP_1, dsxpp.1),
1019 OPC(5, OPC_DSYPP_1, dsypp.1),
1020 OPC(5, OPC_RGETPOS, rgetpos),
1021 OPC(5, OPC_RGETINFO, rgetinfo),
1022
1023
1024 /* category 6: */
1025 OPC(6, OPC_LDG, ldg),
1026 OPC(6, OPC_LDL, ldl),
1027 OPC(6, OPC_LDP, ldp),
1028 OPC(6, OPC_STG, stg),
1029 OPC(6, OPC_STL, stl),
1030 OPC(6, OPC_STP, stp),
1031 OPC(6, OPC_LDIB, ldib),
1032 OPC(6, OPC_G2L, g2l),
1033 OPC(6, OPC_L2G, l2g),
1034 OPC(6, OPC_PREFETCH, prefetch),
1035 OPC(6, OPC_LDLW, ldlw),
1036 OPC(6, OPC_STLW, stlw),
1037 OPC(6, OPC_RESFMT, resfmt),
1038 OPC(6, OPC_RESINFO, resinfo),
1039 OPC(6, OPC_ATOMIC_ADD, atomic.add),
1040 OPC(6, OPC_ATOMIC_SUB, atomic.sub),
1041 OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),
1042 OPC(6, OPC_ATOMIC_INC, atomic.inc),
1043 OPC(6, OPC_ATOMIC_DEC, atomic.dec),
1044 OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
1045 OPC(6, OPC_ATOMIC_MIN, atomic.min),
1046 OPC(6, OPC_ATOMIC_MAX, atomic.max),
1047 OPC(6, OPC_ATOMIC_AND, atomic.and),
1048 OPC(6, OPC_ATOMIC_OR, atomic.or),
1049 OPC(6, OPC_ATOMIC_XOR, atomic.xor),
1050 OPC(6, OPC_LDGB, ldgb),
1051 OPC(6, OPC_STGB, stgb),
1052 OPC(6, OPC_STIB, stib),
1053 OPC(6, OPC_LDC, ldc),
1054 OPC(6, OPC_LDLV, ldlv),
1055
1056 OPC(7, OPC_BAR, bar),
1057 OPC(7, OPC_FENCE, fence),
1058
1059 #undef OPC
1060 };
1061
1062 #define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
1063
1064 // XXX hack.. probably should move this table somewhere common:
1065 #include "ir3.h"
1066 const char *ir3_instr_name(struct ir3_instruction *instr)
1067 {
1068 if (opc_cat(instr->opc) == -1) return "??meta??";
1069 return opcs[instr->opc].name;
1070 }
1071
1072 static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
1073 {
1074 instr_t *instr = (instr_t *)dwords;
1075 uint32_t opc = instr_opc(instr, ctx->gpu_id);
1076 const char *name;
1077
1078 if (debug & PRINT_VERBOSE)
1079 fprintf(ctx->out, "%s%04d[%08xx_%08xx] ", levels[ctx->level], n, dwords[1], dwords[0]);
1080
1081 /* NOTE: order flags are printed is a bit fugly.. but for now I
1082 * try to match the order in llvm-a3xx disassembler for easy
1083 * diff'ing..
1084 */
1085
1086 ctx->repeat = instr_repeat(instr);
1087
1088 if (instr->sync)
1089 fprintf(ctx->out, "(sy)");
1090 if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7)))
1091 fprintf(ctx->out, "(ss)");
1092 if (instr->jmp_tgt)
1093 fprintf(ctx->out, "(jp)");
1094 if (instr_sat(instr))
1095 fprintf(ctx->out, "(sat)");
1096 if (ctx->repeat) {
1097 fprintf(ctx->out, "(rpt%d)", ctx->repeat);
1098 } else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r)) {
1099 unsigned nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
1100 fprintf(ctx->out, "(nop%d)", nop);
1101 } else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) {
1102 unsigned nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
1103 fprintf(ctx->out, "(nop%d)", nop);
1104 }
1105 if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
1106 fprintf(ctx->out, "(ul)");
1107
1108 name = GETINFO(instr)->name;
1109
1110 if (name) {
1111 fprintf(ctx->out, "%s", name);
1112 GETINFO(instr)->print(ctx, instr);
1113 } else {
1114 fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc);
1115 }
1116
1117 fprintf(ctx->out, "\n");
1118
1119 return (instr->opc_cat == 0) && (opc == OPC_END);
1120 }
1121
1122 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
1123 {
1124 struct disasm_ctx ctx;
1125 int i;
1126
1127 assert((sizedwords % 2) == 0);
1128
1129 memset(&ctx, 0, sizeof(ctx));
1130 ctx.out = out;
1131 ctx.level = level;
1132 ctx.gpu_id = gpu_id;
1133
1134 for (i = 0; i < sizedwords; i += 2)
1135 print_instr(&ctx, &dwords[i], i/2);
1136
1137 return 0;
1138 }