freedreno/ir3: split out regmask
[mesa.git] / src / freedreno / ir3 / instr-a3xx.h
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdbool.h>
32 #include <assert.h>
33
34 /* size of largest OPC field of all the instruction categories: */
35 #define NOPC_BITS 6
36
37 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
38
39 typedef enum {
40 /* category 0: */
41 OPC_NOP = _OPC(0, 0),
42 OPC_B = _OPC(0, 1),
43 OPC_JUMP = _OPC(0, 2),
44 OPC_CALL = _OPC(0, 3),
45 OPC_RET = _OPC(0, 4),
46 OPC_KILL = _OPC(0, 5),
47 OPC_END = _OPC(0, 6),
48 OPC_EMIT = _OPC(0, 7),
49 OPC_CUT = _OPC(0, 8),
50 OPC_CHMASK = _OPC(0, 9),
51 OPC_CHSH = _OPC(0, 10),
52 OPC_FLOW_REV = _OPC(0, 11),
53
54 OPC_BKT = _OPC(0, 16),
55 OPC_STKS = _OPC(0, 17),
56 OPC_STKR = _OPC(0, 18),
57 OPC_XSET = _OPC(0, 19),
58 OPC_XCLR = _OPC(0, 20),
59 OPC_GETONE = _OPC(0, 21),
60 OPC_DBG = _OPC(0, 22),
61 OPC_SHPS = _OPC(0, 23), /* shader prologue start */
62 OPC_SHPE = _OPC(0, 24), /* shader prologue end */
63
64 OPC_PREDT = _OPC(0, 29), /* predicated true */
65 OPC_PREDF = _OPC(0, 30), /* predicated false */
66 OPC_PREDE = _OPC(0, 31), /* predicated end */
67
68 /* category 1: */
69 OPC_MOV = _OPC(1, 0),
70
71 /* category 2: */
72 OPC_ADD_F = _OPC(2, 0),
73 OPC_MIN_F = _OPC(2, 1),
74 OPC_MAX_F = _OPC(2, 2),
75 OPC_MUL_F = _OPC(2, 3),
76 OPC_SIGN_F = _OPC(2, 4),
77 OPC_CMPS_F = _OPC(2, 5),
78 OPC_ABSNEG_F = _OPC(2, 6),
79 OPC_CMPV_F = _OPC(2, 7),
80 /* 8 - invalid */
81 OPC_FLOOR_F = _OPC(2, 9),
82 OPC_CEIL_F = _OPC(2, 10),
83 OPC_RNDNE_F = _OPC(2, 11),
84 OPC_RNDAZ_F = _OPC(2, 12),
85 OPC_TRUNC_F = _OPC(2, 13),
86 /* 14-15 - invalid */
87 OPC_ADD_U = _OPC(2, 16),
88 OPC_ADD_S = _OPC(2, 17),
89 OPC_SUB_U = _OPC(2, 18),
90 OPC_SUB_S = _OPC(2, 19),
91 OPC_CMPS_U = _OPC(2, 20),
92 OPC_CMPS_S = _OPC(2, 21),
93 OPC_MIN_U = _OPC(2, 22),
94 OPC_MIN_S = _OPC(2, 23),
95 OPC_MAX_U = _OPC(2, 24),
96 OPC_MAX_S = _OPC(2, 25),
97 OPC_ABSNEG_S = _OPC(2, 26),
98 /* 27 - invalid */
99 OPC_AND_B = _OPC(2, 28),
100 OPC_OR_B = _OPC(2, 29),
101 OPC_NOT_B = _OPC(2, 30),
102 OPC_XOR_B = _OPC(2, 31),
103 /* 32 - invalid */
104 OPC_CMPV_U = _OPC(2, 33),
105 OPC_CMPV_S = _OPC(2, 34),
106 /* 35-47 - invalid */
107 OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
108 OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
109 OPC_MULL_U = _OPC(2, 50),
110 OPC_BFREV_B = _OPC(2, 51),
111 OPC_CLZ_S = _OPC(2, 52),
112 OPC_CLZ_B = _OPC(2, 53),
113 OPC_SHL_B = _OPC(2, 54),
114 OPC_SHR_B = _OPC(2, 55),
115 OPC_ASHR_B = _OPC(2, 56),
116 OPC_BARY_F = _OPC(2, 57),
117 OPC_MGEN_B = _OPC(2, 58),
118 OPC_GETBIT_B = _OPC(2, 59),
119 OPC_SETRM = _OPC(2, 60),
120 OPC_CBITS_B = _OPC(2, 61),
121 OPC_SHB = _OPC(2, 62),
122 OPC_MSAD = _OPC(2, 63),
123
124 /* category 3: */
125 OPC_MAD_U16 = _OPC(3, 0),
126 OPC_MADSH_U16 = _OPC(3, 1),
127 OPC_MAD_S16 = _OPC(3, 2),
128 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
129 OPC_MAD_U24 = _OPC(3, 4),
130 OPC_MAD_S24 = _OPC(3, 5),
131 OPC_MAD_F16 = _OPC(3, 6),
132 OPC_MAD_F32 = _OPC(3, 7),
133 OPC_SEL_B16 = _OPC(3, 8),
134 OPC_SEL_B32 = _OPC(3, 9),
135 OPC_SEL_S16 = _OPC(3, 10),
136 OPC_SEL_S32 = _OPC(3, 11),
137 OPC_SEL_F16 = _OPC(3, 12),
138 OPC_SEL_F32 = _OPC(3, 13),
139 OPC_SAD_S16 = _OPC(3, 14),
140 OPC_SAD_S32 = _OPC(3, 15),
141
142 /* category 4: */
143 OPC_RCP = _OPC(4, 0),
144 OPC_RSQ = _OPC(4, 1),
145 OPC_LOG2 = _OPC(4, 2),
146 OPC_EXP2 = _OPC(4, 3),
147 OPC_SIN = _OPC(4, 4),
148 OPC_COS = _OPC(4, 5),
149 OPC_SQRT = _OPC(4, 6),
150 /* NOTE that these are 8+opc from their highp equivs, so it's possible
151 * that the high order bit in the opc field has been repurposed for
152 * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
153 * still use the same opc as highp
154 */
155 OPC_HRSQ = _OPC(4, 9),
156 OPC_HLOG2 = _OPC(4, 10),
157 OPC_HEXP2 = _OPC(4, 11),
158
159 /* category 5: */
160 OPC_ISAM = _OPC(5, 0),
161 OPC_ISAML = _OPC(5, 1),
162 OPC_ISAMM = _OPC(5, 2),
163 OPC_SAM = _OPC(5, 3),
164 OPC_SAMB = _OPC(5, 4),
165 OPC_SAML = _OPC(5, 5),
166 OPC_SAMGQ = _OPC(5, 6),
167 OPC_GETLOD = _OPC(5, 7),
168 OPC_CONV = _OPC(5, 8),
169 OPC_CONVM = _OPC(5, 9),
170 OPC_GETSIZE = _OPC(5, 10),
171 OPC_GETBUF = _OPC(5, 11),
172 OPC_GETPOS = _OPC(5, 12),
173 OPC_GETINFO = _OPC(5, 13),
174 OPC_DSX = _OPC(5, 14),
175 OPC_DSY = _OPC(5, 15),
176 OPC_GATHER4R = _OPC(5, 16),
177 OPC_GATHER4G = _OPC(5, 17),
178 OPC_GATHER4B = _OPC(5, 18),
179 OPC_GATHER4A = _OPC(5, 19),
180 OPC_SAMGP0 = _OPC(5, 20),
181 OPC_SAMGP1 = _OPC(5, 21),
182 OPC_SAMGP2 = _OPC(5, 22),
183 OPC_SAMGP3 = _OPC(5, 23),
184 OPC_DSXPP_1 = _OPC(5, 24),
185 OPC_DSYPP_1 = _OPC(5, 25),
186 OPC_RGETPOS = _OPC(5, 26),
187 OPC_RGETINFO = _OPC(5, 27),
188 /* cat5 meta instructions, placed above the cat5 opc field's size */
189 OPC_DSXPP_MACRO = _OPC(5, 32),
190 OPC_DSYPP_MACRO = _OPC(5, 33),
191
192 /* category 6: */
193 OPC_LDG = _OPC(6, 0), /* load-global */
194 OPC_LDL = _OPC(6, 1),
195 OPC_LDP = _OPC(6, 2),
196 OPC_STG = _OPC(6, 3), /* store-global */
197 OPC_STL = _OPC(6, 4),
198 OPC_STP = _OPC(6, 5),
199 OPC_LDIB = _OPC(6, 6),
200 OPC_G2L = _OPC(6, 7),
201 OPC_L2G = _OPC(6, 8),
202 OPC_PREFETCH = _OPC(6, 9),
203 OPC_LDLW = _OPC(6, 10),
204 OPC_STLW = _OPC(6, 11),
205 OPC_RESFMT = _OPC(6, 14),
206 OPC_RESINFO = _OPC(6, 15),
207 OPC_ATOMIC_ADD = _OPC(6, 16),
208 OPC_ATOMIC_SUB = _OPC(6, 17),
209 OPC_ATOMIC_XCHG = _OPC(6, 18),
210 OPC_ATOMIC_INC = _OPC(6, 19),
211 OPC_ATOMIC_DEC = _OPC(6, 20),
212 OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
213 OPC_ATOMIC_MIN = _OPC(6, 22),
214 OPC_ATOMIC_MAX = _OPC(6, 23),
215 OPC_ATOMIC_AND = _OPC(6, 24),
216 OPC_ATOMIC_OR = _OPC(6, 25),
217 OPC_ATOMIC_XOR = _OPC(6, 26),
218 OPC_LDGB = _OPC(6, 27),
219 OPC_STGB = _OPC(6, 28),
220 OPC_STIB = _OPC(6, 29),
221 OPC_LDC = _OPC(6, 30),
222 OPC_LDLV = _OPC(6, 31),
223
224 /* category 7: */
225 OPC_BAR = _OPC(7, 0),
226 OPC_FENCE = _OPC(7, 1),
227
228 /* meta instructions (category -1): */
229 /* placeholder instr to mark shader inputs: */
230 OPC_META_INPUT = _OPC(-1, 0),
231 /* The "collect" and "split" instructions are used for keeping
232 * track of instructions that write to multiple dst registers
233 * (split) like texture sample instructions, or read multiple
234 * consecutive scalar registers (collect) (bary.f, texture samp)
235 *
236 * A "split" extracts a scalar component from a vecN, and a
237 * "collect" gathers multiple scalar components into a vecN
238 */
239 OPC_META_SPLIT = _OPC(-1, 2),
240 OPC_META_COLLECT = _OPC(-1, 3),
241
242 /* placeholder for texture fetches that run before FS invocation
243 * starts:
244 */
245 OPC_META_TEX_PREFETCH = _OPC(-1, 4),
246
247 } opc_t;
248
249 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
250 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
251
252 typedef enum {
253 TYPE_F16 = 0,
254 TYPE_F32 = 1,
255 TYPE_U16 = 2,
256 TYPE_U32 = 3,
257 TYPE_S16 = 4,
258 TYPE_S32 = 5,
259 TYPE_U8 = 6,
260 TYPE_S8 = 7, // XXX I assume?
261 } type_t;
262
263 static inline uint32_t type_size(type_t type)
264 {
265 switch (type) {
266 case TYPE_F32:
267 case TYPE_U32:
268 case TYPE_S32:
269 return 32;
270 case TYPE_F16:
271 case TYPE_U16:
272 case TYPE_S16:
273 return 16;
274 case TYPE_U8:
275 case TYPE_S8:
276 return 8;
277 default:
278 assert(0); /* invalid type */
279 return 0;
280 }
281 }
282
283 static inline int type_float(type_t type)
284 {
285 return (type == TYPE_F32) || (type == TYPE_F16);
286 }
287
288 static inline int type_uint(type_t type)
289 {
290 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
291 }
292
293 static inline int type_sint(type_t type)
294 {
295 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
296 }
297
298 typedef union PACKED {
299 /* normal gpr or const src register: */
300 struct PACKED {
301 uint32_t comp : 2;
302 uint32_t num : 10;
303 };
304 /* for immediate val: */
305 int32_t iim_val : 11;
306 /* to make compiler happy: */
307 uint32_t dummy32;
308 uint32_t dummy10 : 10;
309 int32_t idummy10 : 10;
310 uint32_t dummy11 : 11;
311 uint32_t dummy12 : 12;
312 uint32_t dummy13 : 13;
313 uint32_t dummy8 : 8;
314 int32_t idummy13 : 13;
315 int32_t idummy8 : 8;
316 } reg_t;
317
318 /* special registers: */
319 #define REG_A0 61 /* address register */
320 #define REG_P0 62 /* predicate register */
321
322 static inline int reg_special(reg_t reg)
323 {
324 return (reg.num == REG_A0) || (reg.num == REG_P0);
325 }
326
327 typedef enum {
328 BRANCH_PLAIN = 0, /* br */
329 BRANCH_OR = 1, /* brao */
330 BRANCH_AND = 2, /* braa */
331 BRANCH_CONST = 3, /* brac */
332 BRANCH_ANY = 4, /* bany */
333 BRANCH_ALL = 5, /* ball */
334 BRANCH_X = 6, /* brax ??? */
335 } brtype_t;
336
337 typedef struct PACKED {
338 /* dword0: */
339 union PACKED {
340 struct PACKED {
341 int16_t immed : 16;
342 uint32_t dummy1 : 16;
343 } a3xx;
344 struct PACKED {
345 int32_t immed : 20;
346 uint32_t dummy1 : 12;
347 } a4xx;
348 struct PACKED {
349 int32_t immed : 32;
350 } a5xx;
351 };
352
353 /* dword1: */
354 uint32_t idx : 5; /* brac.N index */
355 uint32_t brtype : 3; /* branch type, see brtype_t */
356 uint32_t repeat : 3;
357 uint32_t dummy3 : 1;
358 uint32_t ss : 1;
359 uint32_t inv1 : 1;
360 uint32_t comp1 : 2;
361 uint32_t eq : 1;
362 uint32_t opc_hi : 1; /* at least one bit */
363 uint32_t dummy4 : 2;
364 uint32_t inv0 : 1;
365 uint32_t comp0 : 2; /* component for first src */
366 uint32_t opc : 4;
367 uint32_t jmp_tgt : 1;
368 uint32_t sync : 1;
369 uint32_t opc_cat : 3;
370 } instr_cat0_t;
371
372 typedef struct PACKED {
373 /* dword0: */
374 union PACKED {
375 /* for normal src register: */
376 struct PACKED {
377 uint32_t src : 11;
378 /* at least low bit of pad must be zero or it will
379 * look like a address relative src
380 */
381 uint32_t pad : 21;
382 };
383 /* for address relative: */
384 struct PACKED {
385 int32_t off : 10;
386 uint32_t src_rel_c : 1;
387 uint32_t src_rel : 1;
388 uint32_t unknown : 20;
389 };
390 /* for immediate: */
391 int32_t iim_val;
392 uint32_t uim_val;
393 float fim_val;
394 };
395
396 /* dword1: */
397 uint32_t dst : 8;
398 uint32_t repeat : 3;
399 uint32_t src_r : 1;
400 uint32_t ss : 1;
401 uint32_t ul : 1;
402 uint32_t dst_type : 3;
403 uint32_t dst_rel : 1;
404 uint32_t src_type : 3;
405 uint32_t src_c : 1;
406 uint32_t src_im : 1;
407 uint32_t even : 1;
408 uint32_t pos_inf : 1;
409 uint32_t must_be_0 : 2;
410 uint32_t jmp_tgt : 1;
411 uint32_t sync : 1;
412 uint32_t opc_cat : 3;
413 } instr_cat1_t;
414
415 typedef struct PACKED {
416 /* dword0: */
417 union PACKED {
418 struct PACKED {
419 uint32_t src1 : 11;
420 uint32_t must_be_zero1: 2;
421 uint32_t src1_im : 1; /* immediate */
422 uint32_t src1_neg : 1; /* negate */
423 uint32_t src1_abs : 1; /* absolute value */
424 };
425 struct PACKED {
426 uint32_t src1 : 10;
427 uint32_t src1_c : 1; /* relative-const */
428 uint32_t src1_rel : 1; /* relative address */
429 uint32_t must_be_zero : 1;
430 uint32_t dummy : 3;
431 } rel1;
432 struct PACKED {
433 uint32_t src1 : 12;
434 uint32_t src1_c : 1; /* const */
435 uint32_t dummy : 3;
436 } c1;
437 };
438
439 union PACKED {
440 struct PACKED {
441 uint32_t src2 : 11;
442 uint32_t must_be_zero2: 2;
443 uint32_t src2_im : 1; /* immediate */
444 uint32_t src2_neg : 1; /* negate */
445 uint32_t src2_abs : 1; /* absolute value */
446 };
447 struct PACKED {
448 uint32_t src2 : 10;
449 uint32_t src2_c : 1; /* relative-const */
450 uint32_t src2_rel : 1; /* relative address */
451 uint32_t must_be_zero : 1;
452 uint32_t dummy : 3;
453 } rel2;
454 struct PACKED {
455 uint32_t src2 : 12;
456 uint32_t src2_c : 1; /* const */
457 uint32_t dummy : 3;
458 } c2;
459 };
460
461 /* dword1: */
462 uint32_t dst : 8;
463 uint32_t repeat : 2;
464 uint32_t sat : 1;
465 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
466 uint32_t ss : 1;
467 uint32_t ul : 1; /* dunno */
468 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
469 uint32_t ei : 1;
470 uint32_t cond : 3;
471 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
472 uint32_t full : 1; /* not half */
473 uint32_t opc : 6;
474 uint32_t jmp_tgt : 1;
475 uint32_t sync : 1;
476 uint32_t opc_cat : 3;
477 } instr_cat2_t;
478
479 typedef struct PACKED {
480 /* dword0: */
481 union PACKED {
482 struct PACKED {
483 uint32_t src1 : 11;
484 uint32_t must_be_zero1: 2;
485 uint32_t src2_c : 1;
486 uint32_t src1_neg : 1;
487 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
488 };
489 struct PACKED {
490 uint32_t src1 : 10;
491 uint32_t src1_c : 1;
492 uint32_t src1_rel : 1;
493 uint32_t must_be_zero : 1;
494 uint32_t dummy : 3;
495 } rel1;
496 struct PACKED {
497 uint32_t src1 : 12;
498 uint32_t src1_c : 1;
499 uint32_t dummy : 3;
500 } c1;
501 };
502
503 union PACKED {
504 struct PACKED {
505 uint32_t src3 : 11;
506 uint32_t must_be_zero2: 2;
507 uint32_t src3_r : 1;
508 uint32_t src2_neg : 1;
509 uint32_t src3_neg : 1;
510 };
511 struct PACKED {
512 uint32_t src3 : 10;
513 uint32_t src3_c : 1;
514 uint32_t src3_rel : 1;
515 uint32_t must_be_zero : 1;
516 uint32_t dummy : 3;
517 } rel2;
518 struct PACKED {
519 uint32_t src3 : 12;
520 uint32_t src3_c : 1;
521 uint32_t dummy : 3;
522 } c2;
523 };
524
525 /* dword1: */
526 uint32_t dst : 8;
527 uint32_t repeat : 2;
528 uint32_t sat : 1;
529 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
530 uint32_t ss : 1;
531 uint32_t ul : 1;
532 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
533 uint32_t src2 : 8;
534 uint32_t opc : 4;
535 uint32_t jmp_tgt : 1;
536 uint32_t sync : 1;
537 uint32_t opc_cat : 3;
538 } instr_cat3_t;
539
540 static inline bool instr_cat3_full(instr_cat3_t *cat3)
541 {
542 switch (_OPC(3, cat3->opc)) {
543 case OPC_MAD_F16:
544 case OPC_MAD_U16:
545 case OPC_MAD_S16:
546 case OPC_SEL_B16:
547 case OPC_SEL_S16:
548 case OPC_SEL_F16:
549 case OPC_SAD_S16:
550 case OPC_SAD_S32: // really??
551 return false;
552 default:
553 return true;
554 }
555 }
556
557 typedef struct PACKED {
558 /* dword0: */
559 union PACKED {
560 struct PACKED {
561 uint32_t src : 11;
562 uint32_t must_be_zero1: 2;
563 uint32_t src_im : 1; /* immediate */
564 uint32_t src_neg : 1; /* negate */
565 uint32_t src_abs : 1; /* absolute value */
566 };
567 struct PACKED {
568 uint32_t src : 10;
569 uint32_t src_c : 1; /* relative-const */
570 uint32_t src_rel : 1; /* relative address */
571 uint32_t must_be_zero : 1;
572 uint32_t dummy : 3;
573 } rel;
574 struct PACKED {
575 uint32_t src : 12;
576 uint32_t src_c : 1; /* const */
577 uint32_t dummy : 3;
578 } c;
579 };
580 uint32_t dummy1 : 16; /* seem to be ignored */
581
582 /* dword1: */
583 uint32_t dst : 8;
584 uint32_t repeat : 2;
585 uint32_t sat : 1;
586 uint32_t src_r : 1;
587 uint32_t ss : 1;
588 uint32_t ul : 1;
589 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
590 uint32_t dummy2 : 5; /* seem to be ignored */
591 uint32_t full : 1; /* not half */
592 uint32_t opc : 6;
593 uint32_t jmp_tgt : 1;
594 uint32_t sync : 1;
595 uint32_t opc_cat : 3;
596 } instr_cat4_t;
597
598 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
599 * if so, how to get the (base, index) pair for both sampler and texture.
600 * There is a single base embedded in the instruction, which is always used
601 * for the texture.
602 */
603 typedef enum {
604 /* Use traditional GL binding model, get texture and sampler index
605 * from src3 which is not presumed to be uniform. This is
606 * backwards-compatible with earlier generations, where this field was
607 * always 0 and nonuniform-indexed sampling always worked.
608 */
609 CAT5_NONUNIFORM = 0,
610
611 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
612 * and texture index come from src3 which is presumed to be uniform.
613 */
614 CAT5_BINDLESS_A1_UNIFORM = 1,
615
616 /* The texture and sampler share the same base, and the sampler and
617 * texture index come from src3 which is *not* presumed to be uniform.
618 */
619 CAT5_BINDLESS_NONUNIFORM = 2,
620
621 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
622 * and texture index come from src3 which is *not* presumed to be
623 * uniform.
624 */
625 CAT5_BINDLESS_A1_NONUNIFORM = 3,
626
627 /* Use traditional GL binding model, get texture and sampler index
628 * from src3 which is presumed to be uniform.
629 */
630 CAT5_UNIFORM = 4,
631
632 /* The texture and sampler share the same base, and the sampler and
633 * texture index come from src3 which is presumed to be uniform.
634 */
635 CAT5_BINDLESS_UNIFORM = 5,
636
637 /* The texture and sampler share the same base, get sampler index from low
638 * 4 bits of src3 and texture index from high 4 bits.
639 */
640 CAT5_BINDLESS_IMM = 6,
641
642 /* The sampler base comes from the low 3 bits of a1.x, and the texture
643 * index comes from the next 8 bits of a1.x. The sampler index is an
644 * immediate in src3.
645 */
646 CAT5_BINDLESS_A1_IMM = 7,
647 } cat5_desc_mode_t;
648
649 typedef struct PACKED {
650 /* dword0: */
651 union PACKED {
652 /* normal case: */
653 struct PACKED {
654 uint32_t full : 1; /* not half */
655 uint32_t src1 : 8;
656 uint32_t src2 : 8;
657 uint32_t dummy1 : 4; /* seem to be ignored */
658 uint32_t samp : 4;
659 uint32_t tex : 7;
660 } norm;
661 /* s2en case: */
662 struct PACKED {
663 uint32_t full : 1; /* not half */
664 uint32_t src1 : 8;
665 uint32_t src2 : 8;
666 uint32_t dummy1 : 2;
667 uint32_t base_hi : 2;
668 uint32_t src3 : 8;
669 uint32_t desc_mode : 3;
670 } s2en_bindless;
671 /* same in either case: */
672 // XXX I think, confirm this
673 struct PACKED {
674 uint32_t full : 1; /* not half */
675 uint32_t src1 : 8;
676 uint32_t src2 : 8;
677 uint32_t pad : 15;
678 };
679 };
680
681 /* dword1: */
682 uint32_t dst : 8;
683 uint32_t wrmask : 4; /* write-mask */
684 uint32_t type : 3;
685 uint32_t base_lo : 1; /* used with bindless */
686 uint32_t is_3d : 1;
687
688 uint32_t is_a : 1;
689 uint32_t is_s : 1;
690 uint32_t is_s2en_bindless : 1;
691 uint32_t is_o : 1;
692 uint32_t is_p : 1;
693
694 uint32_t opc : 5;
695 uint32_t jmp_tgt : 1;
696 uint32_t sync : 1;
697 uint32_t opc_cat : 3;
698 } instr_cat5_t;
699
700 /* dword0 encoding for src_off: [src1 + off], src2: */
701 typedef struct PACKED {
702 /* dword0: */
703 uint32_t mustbe1 : 1;
704 int32_t off : 13;
705 uint32_t src1 : 8;
706 uint32_t src1_im : 1;
707 uint32_t src2_im : 1;
708 uint32_t src2 : 8;
709
710 /* dword1: */
711 uint32_t dword1;
712 } instr_cat6a_t;
713
714 /* dword0 encoding for !src_off: [src1], src2 */
715 typedef struct PACKED {
716 /* dword0: */
717 uint32_t mustbe0 : 1;
718 uint32_t src1 : 8;
719 uint32_t pad : 5;
720 uint32_t ignore0 : 8;
721 uint32_t src1_im : 1;
722 uint32_t src2_im : 1;
723 uint32_t src2 : 8;
724
725 /* dword1: */
726 uint32_t dword1;
727 } instr_cat6b_t;
728
729 /* dword1 encoding for dst_off: */
730 typedef struct PACKED {
731 /* dword0: */
732 uint32_t dw0_pad1 : 9;
733 int32_t off_high : 5;
734 uint32_t dw0_pad2 : 18;
735
736 uint32_t off : 8;
737 uint32_t mustbe1 : 1;
738 uint32_t dst : 8;
739 uint32_t pad1 : 15;
740 } instr_cat6c_t;
741
742 /* dword1 encoding for !dst_off: */
743 typedef struct PACKED {
744 /* dword0: */
745 uint32_t dword0;
746
747 uint32_t dst : 8;
748 uint32_t mustbe0 : 1;
749 uint32_t idx : 8;
750 uint32_t pad0 : 15;
751 } instr_cat6d_t;
752
753 /* ldgb and atomics..
754 *
755 * ldgb: pad0=0, pad3=1
756 * atomic .g: pad0=1, pad3=1
757 * .l: pad0=1, pad3=0
758 */
759 typedef struct PACKED {
760 /* dword0: */
761 uint32_t pad0 : 1;
762 uint32_t src3 : 8;
763 uint32_t d : 2;
764 uint32_t typed : 1;
765 uint32_t type_size : 2;
766 uint32_t src1 : 8;
767 uint32_t src1_im : 1;
768 uint32_t src2_im : 1;
769 uint32_t src2 : 8;
770
771 /* dword1: */
772 uint32_t dst : 8;
773 uint32_t mustbe0 : 1;
774 uint32_t src_ssbo : 8;
775 uint32_t pad2 : 3; // type
776 uint32_t g : 1;
777 uint32_t src_ssbo_im : 1;
778 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
779 } instr_cat6ldgb_t;
780
781 /* stgb, pad0=0, pad3=2
782 */
783 typedef struct PACKED {
784 /* dword0: */
785 uint32_t mustbe1 : 1; // ???
786 uint32_t src1 : 8;
787 uint32_t d : 2;
788 uint32_t typed : 1;
789 uint32_t type_size : 2;
790 uint32_t pad0 : 9;
791 uint32_t src2_im : 1;
792 uint32_t src2 : 8;
793
794 /* dword1: */
795 uint32_t src3 : 8;
796 uint32_t src3_im : 1;
797 uint32_t dst_ssbo : 8;
798 uint32_t pad2 : 3; // type
799 uint32_t pad3 : 2;
800 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
801 } instr_cat6stgb_t;
802
803 typedef union PACKED {
804 instr_cat6a_t a;
805 instr_cat6b_t b;
806 instr_cat6c_t c;
807 instr_cat6d_t d;
808 instr_cat6ldgb_t ldgb;
809 instr_cat6stgb_t stgb;
810 struct PACKED {
811 /* dword0: */
812 uint32_t src_off : 1;
813 uint32_t pad1 : 31;
814
815 /* dword1: */
816 uint32_t pad2 : 8;
817 uint32_t dst_off : 1;
818 uint32_t pad3 : 8;
819 uint32_t type : 3;
820 uint32_t g : 1; /* or in some cases it means dst immed */
821 uint32_t pad4 : 1;
822 uint32_t opc : 5;
823 uint32_t jmp_tgt : 1;
824 uint32_t sync : 1;
825 uint32_t opc_cat : 3;
826 };
827 } instr_cat6_t;
828
829 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
830 */
831 typedef enum {
832 /* Use old GL binding model with an immediate index. */
833 CAT6_IMM = 0,
834
835 CAT6_UNIFORM = 1,
836
837 CAT6_NONUNIFORM = 2,
838
839 /* Use the bindless model, with an immediate index.
840 */
841 CAT6_BINDLESS_IMM = 4,
842
843 /* Use the bindless model, with a uniform register index.
844 */
845 CAT6_BINDLESS_UNIFORM = 5,
846
847 /* Use the bindless model, with a register index that isn't guaranteed
848 * to be uniform. This presumably checks if the indices are equal and
849 * splits up the load/store, because it works the way you would
850 * expect.
851 */
852 CAT6_BINDLESS_NONUNIFORM = 6,
853 } cat6_desc_mode_t;
854
855 /**
856 * For atomic ops (which return a value):
857 *
858 * pad1=1, pad3=c, pad5=3
859 * src1 - vecN offset/coords
860 * src2.x - is actually dest register
861 * src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
862 * and src2.z is 'data'
863 *
864 * For stib (which does not return a value):
865 * pad1=0, pad3=c, pad5=2
866 * src1 - vecN offset/coords
867 * src2 - value to store
868 *
869 * For ldib:
870 * pad1=1, pad3=c, pad5=2
871 * src1 - vecN offset/coords
872 *
873 * for ldc (load from UBO using descriptor):
874 * pad1=0, pad3=8, pad5=2
875 *
876 * pad2 and pad5 are only observed to be 0.
877 */
878 typedef struct PACKED {
879 /* dword0: */
880 uint32_t pad1 : 1;
881 uint32_t base : 3;
882 uint32_t pad2 : 2;
883 uint32_t desc_mode : 3;
884 uint32_t d : 2;
885 uint32_t typed : 1;
886 uint32_t type_size : 2;
887 uint32_t opc : 5;
888 uint32_t pad3 : 5;
889 uint32_t src1 : 8; /* coordinate/offset */
890
891 /* dword1: */
892 uint32_t src2 : 8; /* or the dst for load instructions */
893 uint32_t pad4 : 1; //mustbe0 ??
894 uint32_t ssbo : 8; /* ssbo/image binding point */
895 uint32_t type : 3;
896 uint32_t pad5 : 7;
897 uint32_t jmp_tgt : 1;
898 uint32_t sync : 1;
899 uint32_t opc_cat : 3;
900 } instr_cat6_a6xx_t;
901
902 typedef struct PACKED {
903 /* dword0: */
904 uint32_t pad1 : 32;
905
906 /* dword1: */
907 uint32_t pad2 : 12;
908 uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */
909 uint32_t pad3 : 6;
910 uint32_t w : 1; /* write */
911 uint32_t r : 1; /* read */
912 uint32_t l : 1; /* local */
913 uint32_t g : 1; /* global */
914 uint32_t opc : 4; /* presumed, but only a couple known OPCs */
915 uint32_t jmp_tgt : 1; /* (jp) */
916 uint32_t sync : 1; /* (sy) */
917 uint32_t opc_cat : 3;
918 } instr_cat7_t;
919
920 typedef union PACKED {
921 instr_cat0_t cat0;
922 instr_cat1_t cat1;
923 instr_cat2_t cat2;
924 instr_cat3_t cat3;
925 instr_cat4_t cat4;
926 instr_cat5_t cat5;
927 instr_cat6_t cat6;
928 instr_cat6_a6xx_t cat6_a6xx;
929 instr_cat7_t cat7;
930 struct PACKED {
931 /* dword0: */
932 uint32_t pad1 : 32;
933
934 /* dword1: */
935 uint32_t pad2 : 12;
936 uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */
937 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
938 uint32_t pad3 : 13;
939 uint32_t jmp_tgt : 1;
940 uint32_t sync : 1;
941 uint32_t opc_cat : 3;
942
943 };
944 } instr_t;
945
946 static inline uint32_t instr_repeat(instr_t *instr)
947 {
948 switch (instr->opc_cat) {
949 case 0: return instr->cat0.repeat;
950 case 1: return instr->cat1.repeat;
951 case 2: return instr->cat2.repeat;
952 case 3: return instr->cat3.repeat;
953 case 4: return instr->cat4.repeat;
954 default: return 0;
955 }
956 }
957
958 static inline bool instr_sat(instr_t *instr)
959 {
960 switch (instr->opc_cat) {
961 case 2: return instr->cat2.sat;
962 case 3: return instr->cat3.sat;
963 case 4: return instr->cat4.sat;
964 default: return false;
965 }
966 }
967
968 /* We can probably drop the gpu_id arg, but keeping it for now so we can
969 * assert if we see something we think should be new encoding on an older
970 * gpu.
971 */
972 static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
973 {
974 instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
975
976 /* At least one of these two bits is pad in all the possible
977 * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
978 * cmdstream traces I have indicates that the pad bit is zero
979 * in all cases. So we can use this to detect new encoding:
980 */
981 if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
982 assert(gpu_id >= 600);
983 assert(instr->cat6.opc == 0);
984 return false;
985 }
986
987 return true;
988 }
989
990 static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
991 {
992 switch (instr->opc_cat) {
993 case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4;
994 case 1: return 0;
995 case 2: return instr->cat2.opc;
996 case 3: return instr->cat3.opc;
997 case 4: return instr->cat4.opc;
998 case 5: return instr->cat5.opc;
999 case 6:
1000 if (!is_cat6_legacy(instr, gpu_id))
1001 return instr->cat6_a6xx.opc;
1002 return instr->cat6.opc;
1003 case 7: return instr->cat7.opc;
1004 default: return 0;
1005 }
1006 }
1007
1008 static inline bool is_mad(opc_t opc)
1009 {
1010 switch (opc) {
1011 case OPC_MAD_U16:
1012 case OPC_MAD_S16:
1013 case OPC_MAD_U24:
1014 case OPC_MAD_S24:
1015 case OPC_MAD_F16:
1016 case OPC_MAD_F32:
1017 return true;
1018 default:
1019 return false;
1020 }
1021 }
1022
1023 static inline bool is_madsh(opc_t opc)
1024 {
1025 switch (opc) {
1026 case OPC_MADSH_U16:
1027 case OPC_MADSH_M16:
1028 return true;
1029 default:
1030 return false;
1031 }
1032 }
1033
1034 static inline bool is_atomic(opc_t opc)
1035 {
1036 switch (opc) {
1037 case OPC_ATOMIC_ADD:
1038 case OPC_ATOMIC_SUB:
1039 case OPC_ATOMIC_XCHG:
1040 case OPC_ATOMIC_INC:
1041 case OPC_ATOMIC_DEC:
1042 case OPC_ATOMIC_CMPXCHG:
1043 case OPC_ATOMIC_MIN:
1044 case OPC_ATOMIC_MAX:
1045 case OPC_ATOMIC_AND:
1046 case OPC_ATOMIC_OR:
1047 case OPC_ATOMIC_XOR:
1048 return true;
1049 default:
1050 return false;
1051 }
1052 }
1053
1054 static inline bool is_ssbo(opc_t opc)
1055 {
1056 switch (opc) {
1057 case OPC_RESFMT:
1058 case OPC_RESINFO:
1059 case OPC_LDGB:
1060 case OPC_STGB:
1061 case OPC_STIB:
1062 return true;
1063 default:
1064 return false;
1065 }
1066 }
1067
1068 static inline bool is_isam(opc_t opc)
1069 {
1070 switch (opc) {
1071 case OPC_ISAM:
1072 case OPC_ISAML:
1073 case OPC_ISAMM:
1074 return true;
1075 default:
1076 return false;
1077 }
1078 }
1079
1080
1081 static inline bool is_cat2_float(opc_t opc)
1082 {
1083 switch (opc) {
1084 case OPC_ADD_F:
1085 case OPC_MIN_F:
1086 case OPC_MAX_F:
1087 case OPC_MUL_F:
1088 case OPC_SIGN_F:
1089 case OPC_CMPS_F:
1090 case OPC_ABSNEG_F:
1091 case OPC_CMPV_F:
1092 case OPC_FLOOR_F:
1093 case OPC_CEIL_F:
1094 case OPC_RNDNE_F:
1095 case OPC_RNDAZ_F:
1096 case OPC_TRUNC_F:
1097 return true;
1098
1099 default:
1100 return false;
1101 }
1102 }
1103
1104 static inline bool is_cat3_float(opc_t opc)
1105 {
1106 switch (opc) {
1107 case OPC_MAD_F16:
1108 case OPC_MAD_F32:
1109 case OPC_SEL_F16:
1110 case OPC_SEL_F32:
1111 return true;
1112 default:
1113 return false;
1114 }
1115 }
1116
1117 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
1118
1119 #endif /* INSTR_A3XX_H_ */