freedreno/ir3: add meta instruction for pre-fs texture fetch
[mesa.git] / src / freedreno / ir3 / instr-a3xx.h
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdbool.h>
32 #include <assert.h>
33
34 /* size of largest OPC field of all the instruction categories: */
35 #define NOPC_BITS 6
36
37 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
38
39 typedef enum {
40 /* category 0: */
41 OPC_NOP = _OPC(0, 0),
42 OPC_BR = _OPC(0, 1),
43 OPC_JUMP = _OPC(0, 2),
44 OPC_CALL = _OPC(0, 3),
45 OPC_RET = _OPC(0, 4),
46 OPC_KILL = _OPC(0, 5),
47 OPC_END = _OPC(0, 6),
48 OPC_EMIT = _OPC(0, 7),
49 OPC_CUT = _OPC(0, 8),
50 OPC_CHMASK = _OPC(0, 9),
51 OPC_CHSH = _OPC(0, 10),
52 OPC_FLOW_REV = _OPC(0, 11),
53
54 /* category 1: */
55 OPC_MOV = _OPC(1, 0),
56
57 /* category 2: */
58 OPC_ADD_F = _OPC(2, 0),
59 OPC_MIN_F = _OPC(2, 1),
60 OPC_MAX_F = _OPC(2, 2),
61 OPC_MUL_F = _OPC(2, 3),
62 OPC_SIGN_F = _OPC(2, 4),
63 OPC_CMPS_F = _OPC(2, 5),
64 OPC_ABSNEG_F = _OPC(2, 6),
65 OPC_CMPV_F = _OPC(2, 7),
66 /* 8 - invalid */
67 OPC_FLOOR_F = _OPC(2, 9),
68 OPC_CEIL_F = _OPC(2, 10),
69 OPC_RNDNE_F = _OPC(2, 11),
70 OPC_RNDAZ_F = _OPC(2, 12),
71 OPC_TRUNC_F = _OPC(2, 13),
72 /* 14-15 - invalid */
73 OPC_ADD_U = _OPC(2, 16),
74 OPC_ADD_S = _OPC(2, 17),
75 OPC_SUB_U = _OPC(2, 18),
76 OPC_SUB_S = _OPC(2, 19),
77 OPC_CMPS_U = _OPC(2, 20),
78 OPC_CMPS_S = _OPC(2, 21),
79 OPC_MIN_U = _OPC(2, 22),
80 OPC_MIN_S = _OPC(2, 23),
81 OPC_MAX_U = _OPC(2, 24),
82 OPC_MAX_S = _OPC(2, 25),
83 OPC_ABSNEG_S = _OPC(2, 26),
84 /* 27 - invalid */
85 OPC_AND_B = _OPC(2, 28),
86 OPC_OR_B = _OPC(2, 29),
87 OPC_NOT_B = _OPC(2, 30),
88 OPC_XOR_B = _OPC(2, 31),
89 /* 32 - invalid */
90 OPC_CMPV_U = _OPC(2, 33),
91 OPC_CMPV_S = _OPC(2, 34),
92 /* 35-47 - invalid */
93 OPC_MUL_U = _OPC(2, 48),
94 OPC_MUL_S = _OPC(2, 49),
95 OPC_MULL_U = _OPC(2, 50),
96 OPC_BFREV_B = _OPC(2, 51),
97 OPC_CLZ_S = _OPC(2, 52),
98 OPC_CLZ_B = _OPC(2, 53),
99 OPC_SHL_B = _OPC(2, 54),
100 OPC_SHR_B = _OPC(2, 55),
101 OPC_ASHR_B = _OPC(2, 56),
102 OPC_BARY_F = _OPC(2, 57),
103 OPC_MGEN_B = _OPC(2, 58),
104 OPC_GETBIT_B = _OPC(2, 59),
105 OPC_SETRM = _OPC(2, 60),
106 OPC_CBITS_B = _OPC(2, 61),
107 OPC_SHB = _OPC(2, 62),
108 OPC_MSAD = _OPC(2, 63),
109
110 /* category 3: */
111 OPC_MAD_U16 = _OPC(3, 0),
112 OPC_MADSH_U16 = _OPC(3, 1),
113 OPC_MAD_S16 = _OPC(3, 2),
114 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
115 OPC_MAD_U24 = _OPC(3, 4),
116 OPC_MAD_S24 = _OPC(3, 5),
117 OPC_MAD_F16 = _OPC(3, 6),
118 OPC_MAD_F32 = _OPC(3, 7),
119 OPC_SEL_B16 = _OPC(3, 8),
120 OPC_SEL_B32 = _OPC(3, 9),
121 OPC_SEL_S16 = _OPC(3, 10),
122 OPC_SEL_S32 = _OPC(3, 11),
123 OPC_SEL_F16 = _OPC(3, 12),
124 OPC_SEL_F32 = _OPC(3, 13),
125 OPC_SAD_S16 = _OPC(3, 14),
126 OPC_SAD_S32 = _OPC(3, 15),
127
128 /* category 4: */
129 OPC_RCP = _OPC(4, 0),
130 OPC_RSQ = _OPC(4, 1),
131 OPC_LOG2 = _OPC(4, 2),
132 OPC_EXP2 = _OPC(4, 3),
133 OPC_SIN = _OPC(4, 4),
134 OPC_COS = _OPC(4, 5),
135 OPC_SQRT = _OPC(4, 6),
136 // 7-63 - invalid
137
138 /* category 5: */
139 OPC_ISAM = _OPC(5, 0),
140 OPC_ISAML = _OPC(5, 1),
141 OPC_ISAMM = _OPC(5, 2),
142 OPC_SAM = _OPC(5, 3),
143 OPC_SAMB = _OPC(5, 4),
144 OPC_SAML = _OPC(5, 5),
145 OPC_SAMGQ = _OPC(5, 6),
146 OPC_GETLOD = _OPC(5, 7),
147 OPC_CONV = _OPC(5, 8),
148 OPC_CONVM = _OPC(5, 9),
149 OPC_GETSIZE = _OPC(5, 10),
150 OPC_GETBUF = _OPC(5, 11),
151 OPC_GETPOS = _OPC(5, 12),
152 OPC_GETINFO = _OPC(5, 13),
153 OPC_DSX = _OPC(5, 14),
154 OPC_DSY = _OPC(5, 15),
155 OPC_GATHER4R = _OPC(5, 16),
156 OPC_GATHER4G = _OPC(5, 17),
157 OPC_GATHER4B = _OPC(5, 18),
158 OPC_GATHER4A = _OPC(5, 19),
159 OPC_SAMGP0 = _OPC(5, 20),
160 OPC_SAMGP1 = _OPC(5, 21),
161 OPC_SAMGP2 = _OPC(5, 22),
162 OPC_SAMGP3 = _OPC(5, 23),
163 OPC_DSXPP_1 = _OPC(5, 24),
164 OPC_DSYPP_1 = _OPC(5, 25),
165 OPC_RGETPOS = _OPC(5, 26),
166 OPC_RGETINFO = _OPC(5, 27),
167
168 /* category 6: */
169 OPC_LDG = _OPC(6, 0), /* load-global */
170 OPC_LDL = _OPC(6, 1),
171 OPC_LDP = _OPC(6, 2),
172 OPC_STG = _OPC(6, 3), /* store-global */
173 OPC_STL = _OPC(6, 4),
174 OPC_STP = _OPC(6, 5),
175 OPC_LDIB = _OPC(6, 6),
176 OPC_G2L = _OPC(6, 7),
177 OPC_L2G = _OPC(6, 8),
178 OPC_PREFETCH = _OPC(6, 9),
179 OPC_LDLW = _OPC(6, 10),
180 OPC_STLW = _OPC(6, 11),
181 OPC_RESFMT = _OPC(6, 14),
182 OPC_RESINFO = _OPC(6, 15),
183 OPC_ATOMIC_ADD = _OPC(6, 16),
184 OPC_ATOMIC_SUB = _OPC(6, 17),
185 OPC_ATOMIC_XCHG = _OPC(6, 18),
186 OPC_ATOMIC_INC = _OPC(6, 19),
187 OPC_ATOMIC_DEC = _OPC(6, 20),
188 OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
189 OPC_ATOMIC_MIN = _OPC(6, 22),
190 OPC_ATOMIC_MAX = _OPC(6, 23),
191 OPC_ATOMIC_AND = _OPC(6, 24),
192 OPC_ATOMIC_OR = _OPC(6, 25),
193 OPC_ATOMIC_XOR = _OPC(6, 26),
194 OPC_LDGB = _OPC(6, 27),
195 OPC_STGB = _OPC(6, 28),
196 OPC_STIB = _OPC(6, 29),
197 OPC_LDC = _OPC(6, 30),
198 OPC_LDLV = _OPC(6, 31),
199
200 /* category 7: */
201 OPC_BAR = _OPC(7, 0),
202 OPC_FENCE = _OPC(7, 1),
203
204 /* meta instructions (category -1): */
205 /* placeholder instr to mark shader inputs: */
206 OPC_META_INPUT = _OPC(-1, 0),
207 /* The "fan-in" and "fan-out" instructions are used for keeping
208 * track of instructions that write to multiple dst registers
209 * (fan-out) like texture sample instructions, or read multiple
210 * consecutive scalar registers (fan-in) (bary.f, texture samp)
211 */
212 OPC_META_FO = _OPC(-1, 2),
213 OPC_META_FI = _OPC(-1, 3),
214
215 /* placeholder for texture fetches that run before FS invocation
216 * starts:
217 */
218 OPC_META_TEX_PREFETCH = _OPC(-1, 4),
219
220 } opc_t;
221
222 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
223 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
224
225 typedef enum {
226 TYPE_F16 = 0,
227 TYPE_F32 = 1,
228 TYPE_U16 = 2,
229 TYPE_U32 = 3,
230 TYPE_S16 = 4,
231 TYPE_S32 = 5,
232 TYPE_U8 = 6,
233 TYPE_S8 = 7, // XXX I assume?
234 } type_t;
235
236 static inline uint32_t type_size(type_t type)
237 {
238 switch (type) {
239 case TYPE_F32:
240 case TYPE_U32:
241 case TYPE_S32:
242 return 32;
243 case TYPE_F16:
244 case TYPE_U16:
245 case TYPE_S16:
246 return 16;
247 case TYPE_U8:
248 case TYPE_S8:
249 return 8;
250 default:
251 assert(0); /* invalid type */
252 return 0;
253 }
254 }
255
256 static inline int type_float(type_t type)
257 {
258 return (type == TYPE_F32) || (type == TYPE_F16);
259 }
260
261 static inline int type_uint(type_t type)
262 {
263 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
264 }
265
266 static inline int type_sint(type_t type)
267 {
268 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
269 }
270
271 typedef union PACKED {
272 /* normal gpr or const src register: */
273 struct PACKED {
274 uint32_t comp : 2;
275 uint32_t num : 10;
276 };
277 /* for immediate val: */
278 int32_t iim_val : 11;
279 /* to make compiler happy: */
280 uint32_t dummy32;
281 uint32_t dummy10 : 10;
282 int32_t idummy10 : 10;
283 uint32_t dummy11 : 11;
284 uint32_t dummy12 : 12;
285 uint32_t dummy13 : 13;
286 uint32_t dummy8 : 8;
287 } reg_t;
288
289 /* special registers: */
290 #define REG_A0 61 /* address register */
291 #define REG_P0 62 /* predicate register */
292
293 static inline int reg_special(reg_t reg)
294 {
295 return (reg.num == REG_A0) || (reg.num == REG_P0);
296 }
297
298 typedef struct PACKED {
299 /* dword0: */
300 union PACKED {
301 struct PACKED {
302 int16_t immed : 16;
303 uint32_t dummy1 : 16;
304 } a3xx;
305 struct PACKED {
306 int32_t immed : 20;
307 uint32_t dummy1 : 12;
308 } a4xx;
309 struct PACKED {
310 int32_t immed : 32;
311 } a5xx;
312 };
313
314 /* dword1: */
315 uint32_t dummy2 : 8;
316 uint32_t repeat : 3;
317 uint32_t dummy3 : 1;
318 uint32_t ss : 1;
319 uint32_t dummy4 : 7;
320 uint32_t inv : 1;
321 uint32_t comp : 2;
322 uint32_t opc : 4;
323 uint32_t jmp_tgt : 1;
324 uint32_t sync : 1;
325 uint32_t opc_cat : 3;
326 } instr_cat0_t;
327
328 typedef struct PACKED {
329 /* dword0: */
330 union PACKED {
331 /* for normal src register: */
332 struct PACKED {
333 uint32_t src : 11;
334 /* at least low bit of pad must be zero or it will
335 * look like a address relative src
336 */
337 uint32_t pad : 21;
338 };
339 /* for address relative: */
340 struct PACKED {
341 int32_t off : 10;
342 uint32_t src_rel_c : 1;
343 uint32_t src_rel : 1;
344 uint32_t unknown : 20;
345 };
346 /* for immediate: */
347 int32_t iim_val;
348 uint32_t uim_val;
349 float fim_val;
350 };
351
352 /* dword1: */
353 uint32_t dst : 8;
354 uint32_t repeat : 3;
355 uint32_t src_r : 1;
356 uint32_t ss : 1;
357 uint32_t ul : 1;
358 uint32_t dst_type : 3;
359 uint32_t dst_rel : 1;
360 uint32_t src_type : 3;
361 uint32_t src_c : 1;
362 uint32_t src_im : 1;
363 uint32_t even : 1;
364 uint32_t pos_inf : 1;
365 uint32_t must_be_0 : 2;
366 uint32_t jmp_tgt : 1;
367 uint32_t sync : 1;
368 uint32_t opc_cat : 3;
369 } instr_cat1_t;
370
371 typedef struct PACKED {
372 /* dword0: */
373 union PACKED {
374 struct PACKED {
375 uint32_t src1 : 11;
376 uint32_t must_be_zero1: 2;
377 uint32_t src1_im : 1; /* immediate */
378 uint32_t src1_neg : 1; /* negate */
379 uint32_t src1_abs : 1; /* absolute value */
380 };
381 struct PACKED {
382 uint32_t src1 : 10;
383 uint32_t src1_c : 1; /* relative-const */
384 uint32_t src1_rel : 1; /* relative address */
385 uint32_t must_be_zero : 1;
386 uint32_t dummy : 3;
387 } rel1;
388 struct PACKED {
389 uint32_t src1 : 12;
390 uint32_t src1_c : 1; /* const */
391 uint32_t dummy : 3;
392 } c1;
393 };
394
395 union PACKED {
396 struct PACKED {
397 uint32_t src2 : 11;
398 uint32_t must_be_zero2: 2;
399 uint32_t src2_im : 1; /* immediate */
400 uint32_t src2_neg : 1; /* negate */
401 uint32_t src2_abs : 1; /* absolute value */
402 };
403 struct PACKED {
404 uint32_t src2 : 10;
405 uint32_t src2_c : 1; /* relative-const */
406 uint32_t src2_rel : 1; /* relative address */
407 uint32_t must_be_zero : 1;
408 uint32_t dummy : 3;
409 } rel2;
410 struct PACKED {
411 uint32_t src2 : 12;
412 uint32_t src2_c : 1; /* const */
413 uint32_t dummy : 3;
414 } c2;
415 };
416
417 /* dword1: */
418 uint32_t dst : 8;
419 uint32_t repeat : 2;
420 uint32_t sat : 1;
421 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
422 uint32_t ss : 1;
423 uint32_t ul : 1; /* dunno */
424 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
425 uint32_t ei : 1;
426 uint32_t cond : 3;
427 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
428 uint32_t full : 1; /* not half */
429 uint32_t opc : 6;
430 uint32_t jmp_tgt : 1;
431 uint32_t sync : 1;
432 uint32_t opc_cat : 3;
433 } instr_cat2_t;
434
435 typedef struct PACKED {
436 /* dword0: */
437 union PACKED {
438 struct PACKED {
439 uint32_t src1 : 11;
440 uint32_t must_be_zero1: 2;
441 uint32_t src2_c : 1;
442 uint32_t src1_neg : 1;
443 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
444 };
445 struct PACKED {
446 uint32_t src1 : 10;
447 uint32_t src1_c : 1;
448 uint32_t src1_rel : 1;
449 uint32_t must_be_zero : 1;
450 uint32_t dummy : 3;
451 } rel1;
452 struct PACKED {
453 uint32_t src1 : 12;
454 uint32_t src1_c : 1;
455 uint32_t dummy : 3;
456 } c1;
457 };
458
459 union PACKED {
460 struct PACKED {
461 uint32_t src3 : 11;
462 uint32_t must_be_zero2: 2;
463 uint32_t src3_r : 1;
464 uint32_t src2_neg : 1;
465 uint32_t src3_neg : 1;
466 };
467 struct PACKED {
468 uint32_t src3 : 10;
469 uint32_t src3_c : 1;
470 uint32_t src3_rel : 1;
471 uint32_t must_be_zero : 1;
472 uint32_t dummy : 3;
473 } rel2;
474 struct PACKED {
475 uint32_t src3 : 12;
476 uint32_t src3_c : 1;
477 uint32_t dummy : 3;
478 } c2;
479 };
480
481 /* dword1: */
482 uint32_t dst : 8;
483 uint32_t repeat : 2;
484 uint32_t sat : 1;
485 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
486 uint32_t ss : 1;
487 uint32_t ul : 1;
488 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
489 uint32_t src2 : 8;
490 uint32_t opc : 4;
491 uint32_t jmp_tgt : 1;
492 uint32_t sync : 1;
493 uint32_t opc_cat : 3;
494 } instr_cat3_t;
495
496 static inline bool instr_cat3_full(instr_cat3_t *cat3)
497 {
498 switch (_OPC(3, cat3->opc)) {
499 case OPC_MAD_F16:
500 case OPC_MAD_U16:
501 case OPC_MAD_S16:
502 case OPC_SEL_B16:
503 case OPC_SEL_S16:
504 case OPC_SEL_F16:
505 case OPC_SAD_S16:
506 case OPC_SAD_S32: // really??
507 return false;
508 default:
509 return true;
510 }
511 }
512
513 typedef struct PACKED {
514 /* dword0: */
515 union PACKED {
516 struct PACKED {
517 uint32_t src : 11;
518 uint32_t must_be_zero1: 2;
519 uint32_t src_im : 1; /* immediate */
520 uint32_t src_neg : 1; /* negate */
521 uint32_t src_abs : 1; /* absolute value */
522 };
523 struct PACKED {
524 uint32_t src : 10;
525 uint32_t src_c : 1; /* relative-const */
526 uint32_t src_rel : 1; /* relative address */
527 uint32_t must_be_zero : 1;
528 uint32_t dummy : 3;
529 } rel;
530 struct PACKED {
531 uint32_t src : 12;
532 uint32_t src_c : 1; /* const */
533 uint32_t dummy : 3;
534 } c;
535 };
536 uint32_t dummy1 : 16; /* seem to be ignored */
537
538 /* dword1: */
539 uint32_t dst : 8;
540 uint32_t repeat : 2;
541 uint32_t sat : 1;
542 uint32_t src_r : 1;
543 uint32_t ss : 1;
544 uint32_t ul : 1;
545 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
546 uint32_t dummy2 : 5; /* seem to be ignored */
547 uint32_t full : 1; /* not half */
548 uint32_t opc : 6;
549 uint32_t jmp_tgt : 1;
550 uint32_t sync : 1;
551 uint32_t opc_cat : 3;
552 } instr_cat4_t;
553
554 typedef struct PACKED {
555 /* dword0: */
556 union PACKED {
557 /* normal case: */
558 struct PACKED {
559 uint32_t full : 1; /* not half */
560 uint32_t src1 : 8;
561 uint32_t src2 : 8;
562 uint32_t dummy1 : 4; /* seem to be ignored */
563 uint32_t samp : 4;
564 uint32_t tex : 7;
565 } norm;
566 /* s2en case: */
567 struct PACKED {
568 uint32_t full : 1; /* not half */
569 uint32_t src1 : 8;
570 uint32_t src2 : 11;
571 uint32_t dummy1 : 1;
572 uint32_t src3 : 8;
573 uint32_t dummy2 : 3;
574 } s2en;
575 /* same in either case: */
576 // XXX I think, confirm this
577 struct PACKED {
578 uint32_t full : 1; /* not half */
579 uint32_t src1 : 8;
580 uint32_t pad : 23;
581 };
582 };
583
584 /* dword1: */
585 uint32_t dst : 8;
586 uint32_t wrmask : 4; /* write-mask */
587 uint32_t type : 3;
588 uint32_t dummy2 : 1; /* seems to be ignored */
589 uint32_t is_3d : 1;
590
591 uint32_t is_a : 1;
592 uint32_t is_s : 1;
593 uint32_t is_s2en : 1;
594 uint32_t is_o : 1;
595 uint32_t is_p : 1;
596
597 uint32_t opc : 5;
598 uint32_t jmp_tgt : 1;
599 uint32_t sync : 1;
600 uint32_t opc_cat : 3;
601 } instr_cat5_t;
602
603 /* dword0 encoding for src_off: [src1 + off], src2: */
604 typedef struct PACKED {
605 /* dword0: */
606 uint32_t mustbe1 : 1;
607 int32_t off : 13;
608 uint32_t src1 : 8;
609 uint32_t src1_im : 1;
610 uint32_t src2_im : 1;
611 uint32_t src2 : 8;
612
613 /* dword1: */
614 uint32_t dword1;
615 } instr_cat6a_t;
616
617 /* dword0 encoding for !src_off: [src1], src2 */
618 typedef struct PACKED {
619 /* dword0: */
620 uint32_t mustbe0 : 1;
621 uint32_t src1 : 13;
622 uint32_t ignore0 : 8;
623 uint32_t src1_im : 1;
624 uint32_t src2_im : 1;
625 uint32_t src2 : 8;
626
627 /* dword1: */
628 uint32_t dword1;
629 } instr_cat6b_t;
630
631 /* dword1 encoding for dst_off: */
632 typedef struct PACKED {
633 /* dword0: */
634 uint32_t dword0;
635
636 /* note: there is some weird stuff going on where sometimes
637 * cat6->a.off is involved.. but that seems like a bug in
638 * the blob, since it is used even if !cat6->src_off
639 * It would make sense for there to be some more bits to
640 * bring us to 11 bits worth of offset, but not sure..
641 */
642 int32_t off : 8;
643 uint32_t mustbe1 : 1;
644 uint32_t dst : 8;
645 uint32_t pad1 : 15;
646 } instr_cat6c_t;
647
648 /* dword1 encoding for !dst_off: */
649 typedef struct PACKED {
650 /* dword0: */
651 uint32_t dword0;
652
653 uint32_t dst : 8;
654 uint32_t mustbe0 : 1;
655 uint32_t idx : 8;
656 uint32_t pad0 : 15;
657 } instr_cat6d_t;
658
659 /* ldgb and atomics..
660 *
661 * ldgb: pad0=0, pad3=1
662 * atomic .g: pad0=1, pad3=1
663 * .l: pad0=1, pad3=0
664 */
665 typedef struct PACKED {
666 /* dword0: */
667 uint32_t pad0 : 1;
668 uint32_t src3 : 8;
669 uint32_t d : 2;
670 uint32_t typed : 1;
671 uint32_t type_size : 2;
672 uint32_t src1 : 8;
673 uint32_t src1_im : 1;
674 uint32_t src2_im : 1;
675 uint32_t src2 : 8;
676
677 /* dword1: */
678 uint32_t dst : 8;
679 uint32_t mustbe0 : 1;
680 uint32_t src_ssbo : 8;
681 uint32_t pad2 : 3; // type
682 uint32_t g : 1;
683 uint32_t pad3 : 1;
684 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
685 } instr_cat6ldgb_t;
686
687 /* stgb, pad0=0, pad3=2
688 */
689 typedef struct PACKED {
690 /* dword0: */
691 uint32_t mustbe1 : 1; // ???
692 uint32_t src1 : 8;
693 uint32_t d : 2;
694 uint32_t typed : 1;
695 uint32_t type_size : 2;
696 uint32_t pad0 : 9;
697 uint32_t src2_im : 1;
698 uint32_t src2 : 8;
699
700 /* dword1: */
701 uint32_t src3 : 8;
702 uint32_t src3_im : 1;
703 uint32_t dst_ssbo : 8;
704 uint32_t pad2 : 3; // type
705 uint32_t pad3 : 2;
706 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
707 } instr_cat6stgb_t;
708
709 typedef union PACKED {
710 instr_cat6a_t a;
711 instr_cat6b_t b;
712 instr_cat6c_t c;
713 instr_cat6d_t d;
714 instr_cat6ldgb_t ldgb;
715 instr_cat6stgb_t stgb;
716 struct PACKED {
717 /* dword0: */
718 uint32_t src_off : 1;
719 uint32_t pad1 : 31;
720
721 /* dword1: */
722 uint32_t pad2 : 8;
723 uint32_t dst_off : 1;
724 uint32_t pad3 : 8;
725 uint32_t type : 3;
726 uint32_t g : 1; /* or in some cases it means dst immed */
727 uint32_t pad4 : 1;
728 uint32_t opc : 5;
729 uint32_t jmp_tgt : 1;
730 uint32_t sync : 1;
731 uint32_t opc_cat : 3;
732 };
733 } instr_cat6_t;
734
735 /**
736 * For atomic ops (which return a value):
737 *
738 * pad1=1, pad2=c, pad3=0, pad4=3
739 * src1 - vecN offset/coords
740 * src2.x - is actually dest register
741 * src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
742 * and src2.z is 'data'
743 *
744 * For stib (which does not return a value):
745 * pad1=0, pad2=c, pad3=0, pad4=2
746 * src1 - vecN offset/coords
747 * src2 - value to store
748 *
749 * For ldib:
750 * pad1=1, pad2=c, pad3=0, pad4=2
751 * src1 - vecN offset/coords
752 *
753 * for ldc (load from UBO using descriptor):
754 * pad1=0, pad2=8, pad3=0, pad4=2
755 */
756 typedef struct PACKED {
757 /* dword0: */
758 uint32_t pad1 : 9;
759 uint32_t d : 2;
760 uint32_t typed : 1;
761 uint32_t type_size : 2;
762 uint32_t opc : 5;
763 uint32_t pad2 : 5;
764 uint32_t src1 : 8; /* coordinate/offset */
765
766 /* dword1: */
767 uint32_t src2 : 8; /* or the dst for load instructions */
768 uint32_t pad3 : 1; //mustbe0 ?? or zero means imm vs reg for ssbo??
769 uint32_t ssbo : 8; /* ssbo/image binding point */
770 uint32_t type : 3;
771 uint32_t pad4 : 7;
772 uint32_t jmp_tgt : 1;
773 uint32_t sync : 1;
774 uint32_t opc_cat : 3;
775 } instr_cat6_a6xx_t;
776
777 typedef struct PACKED {
778 /* dword0: */
779 uint32_t pad1 : 32;
780
781 /* dword1: */
782 uint32_t pad2 : 12;
783 uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */
784 uint32_t pad3 : 6;
785 uint32_t w : 1; /* write */
786 uint32_t r : 1; /* read */
787 uint32_t l : 1; /* local */
788 uint32_t g : 1; /* global */
789 uint32_t opc : 4; /* presumed, but only a couple known OPCs */
790 uint32_t jmp_tgt : 1; /* (jp) */
791 uint32_t sync : 1; /* (sy) */
792 uint32_t opc_cat : 3;
793 } instr_cat7_t;
794
795 typedef union PACKED {
796 instr_cat0_t cat0;
797 instr_cat1_t cat1;
798 instr_cat2_t cat2;
799 instr_cat3_t cat3;
800 instr_cat4_t cat4;
801 instr_cat5_t cat5;
802 instr_cat6_t cat6;
803 instr_cat6_a6xx_t cat6_a6xx;
804 instr_cat7_t cat7;
805 struct PACKED {
806 /* dword0: */
807 uint32_t pad1 : 32;
808
809 /* dword1: */
810 uint32_t pad2 : 12;
811 uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */
812 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
813 uint32_t pad3 : 13;
814 uint32_t jmp_tgt : 1;
815 uint32_t sync : 1;
816 uint32_t opc_cat : 3;
817
818 };
819 } instr_t;
820
821 static inline uint32_t instr_repeat(instr_t *instr)
822 {
823 switch (instr->opc_cat) {
824 case 0: return instr->cat0.repeat;
825 case 1: return instr->cat1.repeat;
826 case 2: return instr->cat2.repeat;
827 case 3: return instr->cat3.repeat;
828 case 4: return instr->cat4.repeat;
829 default: return 0;
830 }
831 }
832
833 static inline bool instr_sat(instr_t *instr)
834 {
835 switch (instr->opc_cat) {
836 case 2: return instr->cat2.sat;
837 case 3: return instr->cat3.sat;
838 case 4: return instr->cat4.sat;
839 default: return false;
840 }
841 }
842
843 /* We can probably drop the gpu_id arg, but keeping it for now so we can
844 * assert if we see something we think should be new encoding on an older
845 * gpu.
846 */
847 static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
848 {
849 instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
850
851 /* At least one of these two bits is pad in all the possible
852 * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
853 * cmdstream traces I have indicates that the pad bit is zero
854 * in all cases. So we can use this to detect new encoding:
855 */
856 if ((cat6->pad2 & 0x8) && (cat6->pad4 & 0x2)) {
857 assert(gpu_id >= 600);
858 assert(instr->cat6.opc == 0);
859 return false;
860 }
861
862 return true;
863 }
864
865 static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
866 {
867 switch (instr->opc_cat) {
868 case 0: return instr->cat0.opc;
869 case 1: return 0;
870 case 2: return instr->cat2.opc;
871 case 3: return instr->cat3.opc;
872 case 4: return instr->cat4.opc;
873 case 5: return instr->cat5.opc;
874 case 6:
875 if (!is_cat6_legacy(instr, gpu_id))
876 return instr->cat6_a6xx.opc;
877 return instr->cat6.opc;
878 case 7: return instr->cat7.opc;
879 default: return 0;
880 }
881 }
882
883 static inline bool is_mad(opc_t opc)
884 {
885 switch (opc) {
886 case OPC_MAD_U16:
887 case OPC_MAD_S16:
888 case OPC_MAD_U24:
889 case OPC_MAD_S24:
890 case OPC_MAD_F16:
891 case OPC_MAD_F32:
892 return true;
893 default:
894 return false;
895 }
896 }
897
898 static inline bool is_madsh(opc_t opc)
899 {
900 switch (opc) {
901 case OPC_MADSH_U16:
902 case OPC_MADSH_M16:
903 return true;
904 default:
905 return false;
906 }
907 }
908
909 static inline bool is_atomic(opc_t opc)
910 {
911 switch (opc) {
912 case OPC_ATOMIC_ADD:
913 case OPC_ATOMIC_SUB:
914 case OPC_ATOMIC_XCHG:
915 case OPC_ATOMIC_INC:
916 case OPC_ATOMIC_DEC:
917 case OPC_ATOMIC_CMPXCHG:
918 case OPC_ATOMIC_MIN:
919 case OPC_ATOMIC_MAX:
920 case OPC_ATOMIC_AND:
921 case OPC_ATOMIC_OR:
922 case OPC_ATOMIC_XOR:
923 return true;
924 default:
925 return false;
926 }
927 }
928
929 static inline bool is_ssbo(opc_t opc)
930 {
931 switch (opc) {
932 case OPC_RESFMT:
933 case OPC_RESINFO:
934 case OPC_LDGB:
935 case OPC_STGB:
936 case OPC_STIB:
937 return true;
938 default:
939 return false;
940 }
941 }
942
943 static inline bool is_isam(opc_t opc)
944 {
945 switch (opc) {
946 case OPC_ISAM:
947 case OPC_ISAML:
948 case OPC_ISAMM:
949 return true;
950 default:
951 return false;
952 }
953 }
954
955 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
956
957 #endif /* INSTR_A3XX_H_ */