218bdc3e17c216bd03123c0d6f63b16531c413b7
[mesa.git] / src / freedreno / decode / instr-a3xx.h
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <stdint.h>
30 #include <stdbool.h>
31 #include <assert.h>
32
33 void ir3_assert_handler(const char *expr, const char *file, int line,
34 const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
35
36 /* A wrapper for assert() that allows overriding handling of a failed
37 * assert. This is needed for tools like crashdec which can want to
38 * attempt to disassemble memory that might not actually be valid
39 * instructions.
40 */
41 #define ir3_assert(expr) do { \
42 if (!(expr)) { \
43 if (ir3_assert_handler) { \
44 ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
45 } \
46 assert(expr); \
47 } \
48 } while (0)
49
50 /* size of largest OPC field of all the instruction categories: */
51 #define NOPC_BITS 6
52
53 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
54
55 typedef enum {
56 /* category 0: */
57 OPC_NOP = _OPC(0, 0),
58 OPC_B = _OPC(0, 1),
59 OPC_JUMP = _OPC(0, 2),
60 OPC_CALL = _OPC(0, 3),
61 OPC_RET = _OPC(0, 4),
62 OPC_KILL = _OPC(0, 5),
63 OPC_END = _OPC(0, 6),
64 OPC_EMIT = _OPC(0, 7),
65 OPC_CUT = _OPC(0, 8),
66 OPC_CHMASK = _OPC(0, 9),
67 OPC_CHSH = _OPC(0, 10),
68 OPC_FLOW_REV = _OPC(0, 11),
69
70 OPC_BKT = _OPC(0, 16),
71 OPC_STKS = _OPC(0, 17),
72 OPC_STKR = _OPC(0, 18),
73 OPC_XSET = _OPC(0, 19),
74 OPC_XCLR = _OPC(0, 20),
75 OPC_GETONE = _OPC(0, 21),
76 OPC_DBG = _OPC(0, 22),
77 OPC_SHPS = _OPC(0, 23), /* shader prologue start */
78 OPC_SHPE = _OPC(0, 24), /* shader prologue end */
79
80 OPC_PREDT = _OPC(0, 29), /* predicated true */
81 OPC_PREDF = _OPC(0, 30), /* predicated false */
82 OPC_PREDE = _OPC(0, 31), /* predicated end */
83
84 /* category 1: */
85 OPC_MOV = _OPC(1, 0),
86
87 /* category 2: */
88 OPC_ADD_F = _OPC(2, 0),
89 OPC_MIN_F = _OPC(2, 1),
90 OPC_MAX_F = _OPC(2, 2),
91 OPC_MUL_F = _OPC(2, 3),
92 OPC_SIGN_F = _OPC(2, 4),
93 OPC_CMPS_F = _OPC(2, 5),
94 OPC_ABSNEG_F = _OPC(2, 6),
95 OPC_CMPV_F = _OPC(2, 7),
96 /* 8 - invalid */
97 OPC_FLOOR_F = _OPC(2, 9),
98 OPC_CEIL_F = _OPC(2, 10),
99 OPC_RNDNE_F = _OPC(2, 11),
100 OPC_RNDAZ_F = _OPC(2, 12),
101 OPC_TRUNC_F = _OPC(2, 13),
102 /* 14-15 - invalid */
103 OPC_ADD_U = _OPC(2, 16),
104 OPC_ADD_S = _OPC(2, 17),
105 OPC_SUB_U = _OPC(2, 18),
106 OPC_SUB_S = _OPC(2, 19),
107 OPC_CMPS_U = _OPC(2, 20),
108 OPC_CMPS_S = _OPC(2, 21),
109 OPC_MIN_U = _OPC(2, 22),
110 OPC_MIN_S = _OPC(2, 23),
111 OPC_MAX_U = _OPC(2, 24),
112 OPC_MAX_S = _OPC(2, 25),
113 OPC_ABSNEG_S = _OPC(2, 26),
114 /* 27 - invalid */
115 OPC_AND_B = _OPC(2, 28),
116 OPC_OR_B = _OPC(2, 29),
117 OPC_NOT_B = _OPC(2, 30),
118 OPC_XOR_B = _OPC(2, 31),
119 /* 32 - invalid */
120 OPC_CMPV_U = _OPC(2, 33),
121 OPC_CMPV_S = _OPC(2, 34),
122 /* 35-47 - invalid */
123 OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
124 OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
125 OPC_MULL_U = _OPC(2, 50),
126 OPC_BFREV_B = _OPC(2, 51),
127 OPC_CLZ_S = _OPC(2, 52),
128 OPC_CLZ_B = _OPC(2, 53),
129 OPC_SHL_B = _OPC(2, 54),
130 OPC_SHR_B = _OPC(2, 55),
131 OPC_ASHR_B = _OPC(2, 56),
132 OPC_BARY_F = _OPC(2, 57),
133 OPC_MGEN_B = _OPC(2, 58),
134 OPC_GETBIT_B = _OPC(2, 59),
135 OPC_SETRM = _OPC(2, 60),
136 OPC_CBITS_B = _OPC(2, 61),
137 OPC_SHB = _OPC(2, 62),
138 OPC_MSAD = _OPC(2, 63),
139
140 /* category 3: */
141 OPC_MAD_U16 = _OPC(3, 0),
142 OPC_MADSH_U16 = _OPC(3, 1),
143 OPC_MAD_S16 = _OPC(3, 2),
144 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
145 OPC_MAD_U24 = _OPC(3, 4),
146 OPC_MAD_S24 = _OPC(3, 5),
147 OPC_MAD_F16 = _OPC(3, 6),
148 OPC_MAD_F32 = _OPC(3, 7),
149 OPC_SEL_B16 = _OPC(3, 8),
150 OPC_SEL_B32 = _OPC(3, 9),
151 OPC_SEL_S16 = _OPC(3, 10),
152 OPC_SEL_S32 = _OPC(3, 11),
153 OPC_SEL_F16 = _OPC(3, 12),
154 OPC_SEL_F32 = _OPC(3, 13),
155 OPC_SAD_S16 = _OPC(3, 14),
156 OPC_SAD_S32 = _OPC(3, 15),
157
158 /* category 4: */
159 OPC_RCP = _OPC(4, 0),
160 OPC_RSQ = _OPC(4, 1),
161 OPC_LOG2 = _OPC(4, 2),
162 OPC_EXP2 = _OPC(4, 3),
163 OPC_SIN = _OPC(4, 4),
164 OPC_COS = _OPC(4, 5),
165 OPC_SQRT = _OPC(4, 6),
166 /* NOTE that these are 8+opc from their highp equivs, so it's possible
167 * that the high order bit in the opc field has been repurposed for
168 * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
169 * still use the same opc as highp
170 */
171 OPC_HRSQ = _OPC(4, 9),
172 OPC_HLOG2 = _OPC(4, 10),
173 OPC_HEXP2 = _OPC(4, 11),
174
175 /* category 5: */
176 OPC_ISAM = _OPC(5, 0),
177 OPC_ISAML = _OPC(5, 1),
178 OPC_ISAMM = _OPC(5, 2),
179 OPC_SAM = _OPC(5, 3),
180 OPC_SAMB = _OPC(5, 4),
181 OPC_SAML = _OPC(5, 5),
182 OPC_SAMGQ = _OPC(5, 6),
183 OPC_GETLOD = _OPC(5, 7),
184 OPC_CONV = _OPC(5, 8),
185 OPC_CONVM = _OPC(5, 9),
186 OPC_GETSIZE = _OPC(5, 10),
187 OPC_GETBUF = _OPC(5, 11),
188 OPC_GETPOS = _OPC(5, 12),
189 OPC_GETINFO = _OPC(5, 13),
190 OPC_DSX = _OPC(5, 14),
191 OPC_DSY = _OPC(5, 15),
192 OPC_GATHER4R = _OPC(5, 16),
193 OPC_GATHER4G = _OPC(5, 17),
194 OPC_GATHER4B = _OPC(5, 18),
195 OPC_GATHER4A = _OPC(5, 19),
196 OPC_SAMGP0 = _OPC(5, 20),
197 OPC_SAMGP1 = _OPC(5, 21),
198 OPC_SAMGP2 = _OPC(5, 22),
199 OPC_SAMGP3 = _OPC(5, 23),
200 OPC_DSXPP_1 = _OPC(5, 24),
201 OPC_DSYPP_1 = _OPC(5, 25),
202 OPC_RGETPOS = _OPC(5, 26),
203 OPC_RGETINFO = _OPC(5, 27),
204
205 /* category 6: */
206 OPC_LDG = _OPC(6, 0), /* load-global */
207 OPC_LDL = _OPC(6, 1),
208 OPC_LDP = _OPC(6, 2),
209 OPC_STG = _OPC(6, 3), /* store-global */
210 OPC_STL = _OPC(6, 4),
211 OPC_STP = _OPC(6, 5),
212 OPC_LDIB = _OPC(6, 6),
213 OPC_G2L = _OPC(6, 7),
214 OPC_L2G = _OPC(6, 8),
215 OPC_PREFETCH = _OPC(6, 9),
216 OPC_LDLW = _OPC(6, 10),
217 OPC_STLW = _OPC(6, 11),
218 OPC_RESFMT = _OPC(6, 14),
219 OPC_RESINFO = _OPC(6, 15),
220 OPC_ATOMIC_ADD = _OPC(6, 16),
221 OPC_ATOMIC_SUB = _OPC(6, 17),
222 OPC_ATOMIC_XCHG = _OPC(6, 18),
223 OPC_ATOMIC_INC = _OPC(6, 19),
224 OPC_ATOMIC_DEC = _OPC(6, 20),
225 OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
226 OPC_ATOMIC_MIN = _OPC(6, 22),
227 OPC_ATOMIC_MAX = _OPC(6, 23),
228 OPC_ATOMIC_AND = _OPC(6, 24),
229 OPC_ATOMIC_OR = _OPC(6, 25),
230 OPC_ATOMIC_XOR = _OPC(6, 26),
231 OPC_LDGB = _OPC(6, 27),
232 OPC_STGB = _OPC(6, 28),
233 OPC_STIB = _OPC(6, 29),
234 OPC_LDC = _OPC(6, 30),
235 OPC_LDLV = _OPC(6, 31),
236
237 /* category 7: */
238 OPC_BAR = _OPC(7, 0),
239 OPC_FENCE = _OPC(7, 1),
240 } opc_t;
241
242 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
243 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
244
245 typedef enum {
246 TYPE_F16 = 0,
247 TYPE_F32 = 1,
248 TYPE_U16 = 2,
249 TYPE_U32 = 3,
250 TYPE_S16 = 4,
251 TYPE_S32 = 5,
252 TYPE_U8 = 6,
253 TYPE_S8 = 7, // XXX I assume?
254 } type_t;
255
256 static inline uint32_t type_size(type_t type)
257 {
258 switch (type) {
259 case TYPE_F32:
260 case TYPE_U32:
261 case TYPE_S32:
262 return 32;
263 case TYPE_F16:
264 case TYPE_U16:
265 case TYPE_S16:
266 return 16;
267 case TYPE_U8:
268 case TYPE_S8:
269 return 8;
270 default:
271 ir3_assert(0); /* invalid type */
272 return 0;
273 }
274 }
275
276 static inline int type_float(type_t type)
277 {
278 return (type == TYPE_F32) || (type == TYPE_F16);
279 }
280
281 static inline int type_uint(type_t type)
282 {
283 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
284 }
285
286 static inline int type_sint(type_t type)
287 {
288 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
289 }
290
291 typedef union PACKED {
292 /* normal gpr or const src register: */
293 struct PACKED {
294 uint32_t comp : 2;
295 uint32_t num : 10;
296 };
297 /* for immediate val: */
298 int32_t iim_val : 11;
299 /* to make compiler happy: */
300 uint32_t dummy32;
301 uint32_t dummy10 : 10;
302 int32_t idummy10 : 10;
303 uint32_t dummy11 : 11;
304 uint32_t dummy12 : 12;
305 uint32_t dummy13 : 13;
306 uint32_t dummy8 : 8;
307 int32_t idummy13 : 13;
308 int32_t idummy8 : 8;
309 } reg_t;
310
311 /* special registers: */
312 #define REG_A0 61 /* address register */
313 #define REG_P0 62 /* predicate register */
314
315 static inline int reg_special(reg_t reg)
316 {
317 return (reg.num == REG_A0) || (reg.num == REG_P0);
318 }
319
320 typedef enum {
321 BRANCH_PLAIN = 0, /* br */
322 BRANCH_OR = 1, /* brao */
323 BRANCH_AND = 2, /* braa */
324 BRANCH_CONST = 3, /* brac */
325 BRANCH_ANY = 4, /* bany */
326 BRANCH_ALL = 5, /* ball */
327 BRANCH_X = 6, /* brax ??? */
328 } brtype_t;
329
330 typedef struct PACKED {
331 /* dword0: */
332 union PACKED {
333 struct PACKED {
334 int16_t immed : 16;
335 uint32_t dummy1 : 16;
336 } a3xx;
337 struct PACKED {
338 int32_t immed : 20;
339 uint32_t dummy1 : 12;
340 } a4xx;
341 struct PACKED {
342 int32_t immed : 32;
343 } a5xx;
344 };
345
346 /* dword1: */
347 uint32_t idx : 5; /* brac.N index */
348 uint32_t brtype : 3; /* branch type, see brtype_t */
349 uint32_t repeat : 3;
350 uint32_t dummy3 : 1;
351 uint32_t ss : 1;
352 uint32_t inv1 : 1;
353 uint32_t comp1 : 2;
354 uint32_t eq : 1;
355 uint32_t opc_hi : 1; /* at least one bit */
356 uint32_t dummy4 : 2;
357 uint32_t inv0 : 1;
358 uint32_t comp0 : 2; /* component for first src */
359 uint32_t opc : 4;
360 uint32_t jmp_tgt : 1;
361 uint32_t sync : 1;
362 uint32_t opc_cat : 3;
363 } instr_cat0_t;
364
365 typedef struct PACKED {
366 /* dword0: */
367 union PACKED {
368 /* for normal src register: */
369 struct PACKED {
370 uint32_t src : 11;
371 /* at least low bit of pad must be zero or it will
372 * look like a address relative src
373 */
374 uint32_t pad : 21;
375 };
376 /* for address relative: */
377 struct PACKED {
378 int32_t off : 10;
379 uint32_t src_rel_c : 1;
380 uint32_t src_rel : 1;
381 uint32_t unknown : 20;
382 };
383 /* for immediate: */
384 int32_t iim_val;
385 uint32_t uim_val;
386 float fim_val;
387 };
388
389 /* dword1: */
390 uint32_t dst : 8;
391 uint32_t repeat : 3;
392 uint32_t src_r : 1;
393 uint32_t ss : 1;
394 uint32_t ul : 1;
395 uint32_t dst_type : 3;
396 uint32_t dst_rel : 1;
397 uint32_t src_type : 3;
398 uint32_t src_c : 1;
399 uint32_t src_im : 1;
400 uint32_t even : 1;
401 uint32_t pos_inf : 1;
402 uint32_t must_be_0 : 2;
403 uint32_t jmp_tgt : 1;
404 uint32_t sync : 1;
405 uint32_t opc_cat : 3;
406 } instr_cat1_t;
407
408 typedef struct PACKED {
409 /* dword0: */
410 union PACKED {
411 struct PACKED {
412 uint32_t src1 : 11;
413 uint32_t must_be_zero1: 2;
414 uint32_t src1_im : 1; /* immediate */
415 uint32_t src1_neg : 1; /* negate */
416 uint32_t src1_abs : 1; /* absolute value */
417 };
418 struct PACKED {
419 uint32_t src1 : 10;
420 uint32_t src1_c : 1; /* relative-const */
421 uint32_t src1_rel : 1; /* relative address */
422 uint32_t must_be_zero : 1;
423 uint32_t dummy : 3;
424 } rel1;
425 struct PACKED {
426 uint32_t src1 : 12;
427 uint32_t src1_c : 1; /* const */
428 uint32_t dummy : 3;
429 } c1;
430 };
431
432 union PACKED {
433 struct PACKED {
434 uint32_t src2 : 11;
435 uint32_t must_be_zero2: 2;
436 uint32_t src2_im : 1; /* immediate */
437 uint32_t src2_neg : 1; /* negate */
438 uint32_t src2_abs : 1; /* absolute value */
439 };
440 struct PACKED {
441 uint32_t src2 : 10;
442 uint32_t src2_c : 1; /* relative-const */
443 uint32_t src2_rel : 1; /* relative address */
444 uint32_t must_be_zero : 1;
445 uint32_t dummy : 3;
446 } rel2;
447 struct PACKED {
448 uint32_t src2 : 12;
449 uint32_t src2_c : 1; /* const */
450 uint32_t dummy : 3;
451 } c2;
452 };
453
454 /* dword1: */
455 uint32_t dst : 8;
456 uint32_t repeat : 2;
457 uint32_t sat : 1;
458 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
459 uint32_t ss : 1;
460 uint32_t ul : 1; /* dunno */
461 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
462 uint32_t ei : 1;
463 uint32_t cond : 3;
464 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
465 uint32_t full : 1; /* not half */
466 uint32_t opc : 6;
467 uint32_t jmp_tgt : 1;
468 uint32_t sync : 1;
469 uint32_t opc_cat : 3;
470 } instr_cat2_t;
471
472 typedef struct PACKED {
473 /* dword0: */
474 union PACKED {
475 struct PACKED {
476 uint32_t src1 : 11;
477 uint32_t must_be_zero1: 2;
478 uint32_t src2_c : 1;
479 uint32_t src1_neg : 1;
480 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
481 };
482 struct PACKED {
483 uint32_t src1 : 10;
484 uint32_t src1_c : 1;
485 uint32_t src1_rel : 1;
486 uint32_t must_be_zero : 1;
487 uint32_t dummy : 3;
488 } rel1;
489 struct PACKED {
490 uint32_t src1 : 12;
491 uint32_t src1_c : 1;
492 uint32_t dummy : 3;
493 } c1;
494 };
495
496 union PACKED {
497 struct PACKED {
498 uint32_t src3 : 11;
499 uint32_t must_be_zero2: 2;
500 uint32_t src3_r : 1;
501 uint32_t src2_neg : 1;
502 uint32_t src3_neg : 1;
503 };
504 struct PACKED {
505 uint32_t src3 : 10;
506 uint32_t src3_c : 1;
507 uint32_t src3_rel : 1;
508 uint32_t must_be_zero : 1;
509 uint32_t dummy : 3;
510 } rel2;
511 struct PACKED {
512 uint32_t src3 : 12;
513 uint32_t src3_c : 1;
514 uint32_t dummy : 3;
515 } c2;
516 };
517
518 /* dword1: */
519 uint32_t dst : 8;
520 uint32_t repeat : 2;
521 uint32_t sat : 1;
522 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
523 uint32_t ss : 1;
524 uint32_t ul : 1;
525 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
526 uint32_t src2 : 8;
527 uint32_t opc : 4;
528 uint32_t jmp_tgt : 1;
529 uint32_t sync : 1;
530 uint32_t opc_cat : 3;
531 } instr_cat3_t;
532
533 static inline bool instr_cat3_full(instr_cat3_t *cat3)
534 {
535 switch (_OPC(3, cat3->opc)) {
536 case OPC_MAD_F16:
537 case OPC_MAD_U16:
538 case OPC_MAD_S16:
539 case OPC_SEL_B16:
540 case OPC_SEL_S16:
541 case OPC_SEL_F16:
542 case OPC_SAD_S16:
543 case OPC_SAD_S32: // really??
544 return false;
545 default:
546 return true;
547 }
548 }
549
550 typedef struct PACKED {
551 /* dword0: */
552 union PACKED {
553 struct PACKED {
554 uint32_t src : 11;
555 uint32_t must_be_zero1: 2;
556 uint32_t src_im : 1; /* immediate */
557 uint32_t src_neg : 1; /* negate */
558 uint32_t src_abs : 1; /* absolute value */
559 };
560 struct PACKED {
561 uint32_t src : 10;
562 uint32_t src_c : 1; /* relative-const */
563 uint32_t src_rel : 1; /* relative address */
564 uint32_t must_be_zero : 1;
565 uint32_t dummy : 3;
566 } rel;
567 struct PACKED {
568 uint32_t src : 12;
569 uint32_t src_c : 1; /* const */
570 uint32_t dummy : 3;
571 } c;
572 };
573 uint32_t dummy1 : 16; /* seem to be ignored */
574
575 /* dword1: */
576 uint32_t dst : 8;
577 uint32_t repeat : 2;
578 uint32_t sat : 1;
579 uint32_t src_r : 1;
580 uint32_t ss : 1;
581 uint32_t ul : 1;
582 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
583 uint32_t dummy2 : 5; /* seem to be ignored */
584 uint32_t full : 1; /* not half */
585 uint32_t opc : 6;
586 uint32_t jmp_tgt : 1;
587 uint32_t sync : 1;
588 uint32_t opc_cat : 3;
589 } instr_cat4_t;
590
591 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
592 * if so, how to get the (base, index) pair for both sampler and texture.
593 * There is a single base embedded in the instruction, which is always used
594 * for the texture.
595 */
596 typedef enum {
597 /* Use traditional GL binding model, get texture and sampler index
598 * from src3 which is not presumed to be uniform. This is
599 * backwards-compatible with earlier generations, where this field was
600 * always 0 and nonuniform-indexed sampling always worked.
601 */
602 CAT5_NONUNIFORM = 0,
603
604 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
605 * and texture index come from src3 which is presumed to be uniform.
606 */
607 CAT5_BINDLESS_A1_UNIFORM = 1,
608
609 /* The texture and sampler share the same base, and the sampler and
610 * texture index come from src3 which is *not* presumed to be uniform.
611 */
612 CAT5_BINDLESS_NONUNIFORM = 2,
613
614 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
615 * and texture index come from src3 which is *not* presumed to be
616 * uniform.
617 */
618 CAT5_BINDLESS_A1_NONUNIFORM = 3,
619
620 /* Use traditional GL binding model, get texture and sampler index
621 * from src3 which is presumed to be uniform.
622 */
623 CAT5_UNIFORM = 4,
624
625 /* The texture and sampler share the same base, and the sampler and
626 * texture index come from src3 which is presumed to be uniform.
627 */
628 CAT5_BINDLESS_UNIFORM = 5,
629
630 /* The texture and sampler share the same base, get sampler index from low
631 * 4 bits of src3 and texture index from high 4 bits.
632 */
633 CAT5_BINDLESS_IMM = 6,
634
635 /* The sampler base comes from the low 3 bits of a1.x, and the texture
636 * index comes from the next 8 bits of a1.x. The sampler index is an
637 * immediate in src3.
638 */
639 CAT5_BINDLESS_A1_IMM = 7,
640 } cat5_desc_mode_t;
641
642 typedef struct PACKED {
643 /* dword0: */
644 union PACKED {
645 /* normal case: */
646 struct PACKED {
647 uint32_t full : 1; /* not half */
648 uint32_t src1 : 8;
649 uint32_t src2 : 8;
650 uint32_t dummy1 : 4; /* seem to be ignored */
651 uint32_t samp : 4;
652 uint32_t tex : 7;
653 } norm;
654 /* s2en case: */
655 struct PACKED {
656 uint32_t full : 1; /* not half */
657 uint32_t src1 : 8;
658 uint32_t src2 : 8;
659 uint32_t dummy1 : 2;
660 uint32_t base_hi : 2;
661 uint32_t src3 : 8;
662 uint32_t desc_mode : 3;
663 } s2en_bindless;
664 /* same in either case: */
665 // XXX I think, confirm this
666 struct PACKED {
667 uint32_t full : 1; /* not half */
668 uint32_t src1 : 8;
669 uint32_t src2 : 8;
670 uint32_t pad : 15;
671 };
672 };
673
674 /* dword1: */
675 uint32_t dst : 8;
676 uint32_t wrmask : 4; /* write-mask */
677 uint32_t type : 3;
678 uint32_t base_lo : 1; /* used with bindless */
679 uint32_t is_3d : 1;
680
681 uint32_t is_a : 1;
682 uint32_t is_s : 1;
683 uint32_t is_s2en_bindless : 1;
684 uint32_t is_o : 1;
685 uint32_t is_p : 1;
686
687 uint32_t opc : 5;
688 uint32_t jmp_tgt : 1;
689 uint32_t sync : 1;
690 uint32_t opc_cat : 3;
691 } instr_cat5_t;
692
693 /* dword0 encoding for src_off: [src1 + off], src2: */
694 typedef struct PACKED {
695 /* dword0: */
696 uint32_t mustbe1 : 1;
697 int32_t off : 13;
698 uint32_t src1 : 8;
699 uint32_t src1_im : 1;
700 uint32_t src2_im : 1;
701 uint32_t src2 : 8;
702
703 /* dword1: */
704 uint32_t dword1;
705 } instr_cat6a_t;
706
707 /* dword0 encoding for !src_off: [src1], src2 */
708 typedef struct PACKED {
709 /* dword0: */
710 uint32_t mustbe0 : 1;
711 uint32_t src1 : 13;
712 uint32_t ignore0 : 8;
713 uint32_t src1_im : 1;
714 uint32_t src2_im : 1;
715 uint32_t src2 : 8;
716
717 /* dword1: */
718 uint32_t dword1;
719 } instr_cat6b_t;
720
721 /* dword1 encoding for dst_off: */
722 typedef struct PACKED {
723 /* dword0: */
724 uint32_t dword0;
725
726 /* note: there is some weird stuff going on where sometimes
727 * cat6->a.off is involved.. but that seems like a bug in
728 * the blob, since it is used even if !cat6->src_off
729 * It would make sense for there to be some more bits to
730 * bring us to 11 bits worth of offset, but not sure..
731 */
732 int32_t off : 8;
733 uint32_t mustbe1 : 1;
734 uint32_t dst : 8;
735 uint32_t pad1 : 15;
736 } instr_cat6c_t;
737
738 /* dword1 encoding for !dst_off: */
739 typedef struct PACKED {
740 /* dword0: */
741 uint32_t dword0;
742
743 uint32_t dst : 8;
744 uint32_t mustbe0 : 1;
745 uint32_t idx : 8;
746 uint32_t pad0 : 15;
747 } instr_cat6d_t;
748
749 /* ldgb and atomics..
750 *
751 * ldgb: pad0=0, pad3=1
752 * atomic .g: pad0=1, pad3=1
753 * .l: pad0=1, pad3=0
754 */
755 typedef struct PACKED {
756 /* dword0: */
757 uint32_t pad0 : 1;
758 uint32_t src3 : 8;
759 uint32_t d : 2;
760 uint32_t typed : 1;
761 uint32_t type_size : 2;
762 uint32_t src1 : 8;
763 uint32_t src1_im : 1;
764 uint32_t src2_im : 1;
765 uint32_t src2 : 8;
766
767 /* dword1: */
768 uint32_t dst : 8;
769 uint32_t mustbe0 : 1;
770 uint32_t src_ssbo : 8;
771 uint32_t pad2 : 3; // type
772 uint32_t g : 1;
773 uint32_t pad3 : 1;
774 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
775 } instr_cat6ldgb_t;
776
777 /* stgb, pad0=0, pad3=2
778 */
779 typedef struct PACKED {
780 /* dword0: */
781 uint32_t mustbe1 : 1; // ???
782 uint32_t src1 : 8;
783 uint32_t d : 2;
784 uint32_t typed : 1;
785 uint32_t type_size : 2;
786 uint32_t pad0 : 9;
787 uint32_t src2_im : 1;
788 uint32_t src2 : 8;
789
790 /* dword1: */
791 uint32_t src3 : 8;
792 uint32_t src3_im : 1;
793 uint32_t dst_ssbo : 8;
794 uint32_t pad2 : 3; // type
795 uint32_t pad3 : 2;
796 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
797 } instr_cat6stgb_t;
798
799 typedef union PACKED {
800 instr_cat6a_t a;
801 instr_cat6b_t b;
802 instr_cat6c_t c;
803 instr_cat6d_t d;
804 instr_cat6ldgb_t ldgb;
805 instr_cat6stgb_t stgb;
806 struct PACKED {
807 /* dword0: */
808 uint32_t src_off : 1;
809 uint32_t pad1 : 31;
810
811 /* dword1: */
812 uint32_t pad2 : 8;
813 uint32_t dst_off : 1;
814 uint32_t pad3 : 8;
815 uint32_t type : 3;
816 uint32_t g : 1; /* or in some cases it means dst immed */
817 uint32_t pad4 : 1;
818 uint32_t opc : 5;
819 uint32_t jmp_tgt : 1;
820 uint32_t sync : 1;
821 uint32_t opc_cat : 3;
822 };
823 } instr_cat6_t;
824
825 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
826 */
827 typedef enum {
828 /* Use old GL binding model with an immediate index. */
829 CAT6_IMM = 0,
830
831 CAT6_UNIFORM = 1,
832
833 CAT6_NONUNIFORM = 2,
834
835 /* Use the bindless model, with an immediate index.
836 */
837 CAT6_BINDLESS_IMM = 4,
838
839 /* Use the bindless model, with a uniform register index.
840 */
841 CAT6_BINDLESS_UNIFORM = 5,
842
843 /* Use the bindless model, with a register index that isn't guaranteed
844 * to be uniform. This presumably checks if the indices are equal and
845 * splits up the load/store, because it works the way you would
846 * expect.
847 */
848 CAT6_BINDLESS_NONUNIFORM = 6,
849 } cat6_desc_mode_t;
850
851 /**
852 * For atomic ops (which return a value):
853 *
854 * pad1=1, pad3=c, pad5=3
855 * src1 - vecN offset/coords
856 * src2.x - is actually dest register
857 * src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
858 * and src2.z is 'data'
859 *
860 * For stib (which does not return a value):
861 * pad1=0, pad3=c, pad5=2
862 * src1 - vecN offset/coords
863 * src2 - value to store
864 *
865 * For ldib:
866 * pad1=1, pad3=c, pad5=2
867 * src1 - vecN offset/coords
868 *
869 * for ldc (load from UBO using descriptor):
870 * pad1=0, pad3=8, pad5=2
871 *
872 * pad2 and pad5 are only observed to be 0.
873 */
874 typedef struct PACKED {
875 /* dword0: */
876 uint32_t pad1 : 1;
877 uint32_t base : 3;
878 uint32_t pad2 : 2;
879 uint32_t desc_mode : 3;
880 uint32_t d : 2;
881 uint32_t typed : 1;
882 uint32_t type_size : 2;
883 uint32_t opc : 5;
884 uint32_t pad3 : 5;
885 uint32_t src1 : 8; /* coordinate/offset */
886
887 /* dword1: */
888 uint32_t src2 : 8; /* or the dst for load instructions */
889 uint32_t pad4 : 1; //mustbe0 ??
890 uint32_t ssbo : 8; /* ssbo/image binding point */
891 uint32_t type : 3;
892 uint32_t pad5 : 7;
893 uint32_t jmp_tgt : 1;
894 uint32_t sync : 1;
895 uint32_t opc_cat : 3;
896 } instr_cat6_a6xx_t;
897
898 typedef struct PACKED {
899 /* dword0: */
900 uint32_t pad1 : 32;
901
902 /* dword1: */
903 uint32_t pad2 : 12;
904 uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */
905 uint32_t pad3 : 6;
906 uint32_t w : 1; /* write */
907 uint32_t r : 1; /* read */
908 uint32_t l : 1; /* local */
909 uint32_t g : 1; /* global */
910 uint32_t opc : 4; /* presumed, but only a couple known OPCs */
911 uint32_t jmp_tgt : 1; /* (jp) */
912 uint32_t sync : 1; /* (sy) */
913 uint32_t opc_cat : 3;
914 } instr_cat7_t;
915
916 typedef union PACKED {
917 instr_cat0_t cat0;
918 instr_cat1_t cat1;
919 instr_cat2_t cat2;
920 instr_cat3_t cat3;
921 instr_cat4_t cat4;
922 instr_cat5_t cat5;
923 instr_cat6_t cat6;
924 instr_cat6_a6xx_t cat6_a6xx;
925 instr_cat7_t cat7;
926 struct PACKED {
927 /* dword0: */
928 uint32_t pad1 : 32;
929
930 /* dword1: */
931 uint32_t pad2 : 12;
932 uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */
933 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
934 uint32_t pad3 : 13;
935 uint32_t jmp_tgt : 1;
936 uint32_t sync : 1;
937 uint32_t opc_cat : 3;
938
939 };
940 } instr_t;
941
942 static inline uint32_t instr_repeat(instr_t *instr)
943 {
944 switch (instr->opc_cat) {
945 case 0: return instr->cat0.repeat;
946 case 1: return instr->cat1.repeat;
947 case 2: return instr->cat2.repeat;
948 case 3: return instr->cat3.repeat;
949 case 4: return instr->cat4.repeat;
950 default: return 0;
951 }
952 }
953
954 static inline bool instr_sat(instr_t *instr)
955 {
956 switch (instr->opc_cat) {
957 case 2: return instr->cat2.sat;
958 case 3: return instr->cat3.sat;
959 case 4: return instr->cat4.sat;
960 default: return false;
961 }
962 }
963
964 /* We can probably drop the gpu_id arg, but keeping it for now so we can
965 * assert if we see something we think should be new encoding on an older
966 * gpu.
967 */
968 static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
969 {
970 instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
971
972 /* At least one of these two bits is pad in all the possible
973 * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
974 * cmdstream traces I have indicates that the pad bit is zero
975 * in all cases. So we can use this to detect new encoding:
976 */
977 if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
978 ir3_assert(gpu_id >= 600);
979 ir3_assert(instr->cat6.opc == 0);
980 return false;
981 }
982
983 return true;
984 }
985
986 static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
987 {
988 switch (instr->opc_cat) {
989 case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4;
990 case 1: return 0;
991 case 2: return instr->cat2.opc;
992 case 3: return instr->cat3.opc;
993 case 4: return instr->cat4.opc;
994 case 5: return instr->cat5.opc;
995 case 6:
996 if (!is_cat6_legacy(instr, gpu_id))
997 return instr->cat6_a6xx.opc;
998 return instr->cat6.opc;
999 case 7: return instr->cat7.opc;
1000 default: return 0;
1001 }
1002 }
1003
1004 static inline bool is_mad(opc_t opc)
1005 {
1006 switch (opc) {
1007 case OPC_MAD_U16:
1008 case OPC_MAD_S16:
1009 case OPC_MAD_U24:
1010 case OPC_MAD_S24:
1011 case OPC_MAD_F16:
1012 case OPC_MAD_F32:
1013 return true;
1014 default:
1015 return false;
1016 }
1017 }
1018
1019 static inline bool is_madsh(opc_t opc)
1020 {
1021 switch (opc) {
1022 case OPC_MADSH_U16:
1023 case OPC_MADSH_M16:
1024 return true;
1025 default:
1026 return false;
1027 }
1028 }
1029
1030 static inline bool is_atomic(opc_t opc)
1031 {
1032 switch (opc) {
1033 case OPC_ATOMIC_ADD:
1034 case OPC_ATOMIC_SUB:
1035 case OPC_ATOMIC_XCHG:
1036 case OPC_ATOMIC_INC:
1037 case OPC_ATOMIC_DEC:
1038 case OPC_ATOMIC_CMPXCHG:
1039 case OPC_ATOMIC_MIN:
1040 case OPC_ATOMIC_MAX:
1041 case OPC_ATOMIC_AND:
1042 case OPC_ATOMIC_OR:
1043 case OPC_ATOMIC_XOR:
1044 return true;
1045 default:
1046 return false;
1047 }
1048 }
1049
1050 static inline bool is_ssbo(opc_t opc)
1051 {
1052 switch (opc) {
1053 case OPC_RESFMT:
1054 case OPC_RESINFO:
1055 case OPC_LDGB:
1056 case OPC_STGB:
1057 case OPC_STIB:
1058 return true;
1059 default:
1060 return false;
1061 }
1062 }
1063
1064 static inline bool is_isam(opc_t opc)
1065 {
1066 switch (opc) {
1067 case OPC_ISAM:
1068 case OPC_ISAML:
1069 case OPC_ISAMM:
1070 return true;
1071 default:
1072 return false;
1073 }
1074 }
1075
1076
1077 static inline bool is_cat2_float(opc_t opc)
1078 {
1079 switch (opc) {
1080 case OPC_ADD_F:
1081 case OPC_MIN_F:
1082 case OPC_MAX_F:
1083 case OPC_MUL_F:
1084 case OPC_SIGN_F:
1085 case OPC_CMPS_F:
1086 case OPC_ABSNEG_F:
1087 case OPC_CMPV_F:
1088 case OPC_FLOOR_F:
1089 case OPC_CEIL_F:
1090 case OPC_RNDNE_F:
1091 case OPC_RNDAZ_F:
1092 case OPC_TRUNC_F:
1093 return true;
1094
1095 default:
1096 return false;
1097 }
1098 }
1099
1100 static inline bool is_cat3_float(opc_t opc)
1101 {
1102 switch (opc) {
1103 case OPC_MAD_F16:
1104 case OPC_MAD_F32:
1105 case OPC_SEL_F16:
1106 case OPC_SEL_F32:
1107 return true;
1108 default:
1109 return false;
1110 }
1111 }
1112
1113 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
1114
1115 #endif /* INSTR_A3XX_H_ */