Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / drivers / freedreno / ir3 / instr-a3xx.h
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <stdint.h>
30 #include <assert.h>
31
32 typedef enum {
33 /* category 0: */
34 OPC_NOP = 0,
35 OPC_BR = 1,
36 OPC_JUMP = 2,
37 OPC_CALL = 3,
38 OPC_RET = 4,
39 OPC_KILL = 5,
40 OPC_END = 6,
41 OPC_EMIT = 7,
42 OPC_CUT = 8,
43 OPC_CHMASK = 9,
44 OPC_CHSH = 10,
45 OPC_FLOW_REV = 11,
46
47 /* category 1: */
48 /* no opc.. all category 1 are variants of mov */
49
50 /* category 2: */
51 OPC_ADD_F = 0,
52 OPC_MIN_F = 1,
53 OPC_MAX_F = 2,
54 OPC_MUL_F = 3,
55 OPC_SIGN_F = 4,
56 OPC_CMPS_F = 5,
57 OPC_ABSNEG_F = 6,
58 OPC_CMPV_F = 7,
59 /* 8 - invalid */
60 OPC_FLOOR_F = 9,
61 OPC_CEIL_F = 10,
62 OPC_RNDNE_F = 11,
63 OPC_RNDAZ_F = 12,
64 OPC_TRUNC_F = 13,
65 /* 14-15 - invalid */
66 OPC_ADD_U = 16,
67 OPC_ADD_S = 17,
68 OPC_SUB_U = 18,
69 OPC_SUB_S = 19,
70 OPC_CMPS_U = 20,
71 OPC_CMPS_S = 21,
72 OPC_MIN_U = 22,
73 OPC_MIN_S = 23,
74 OPC_MAX_U = 24,
75 OPC_MAX_S = 25,
76 OPC_ABSNEG_S = 26,
77 /* 27 - invalid */
78 OPC_AND_B = 28,
79 OPC_OR_B = 29,
80 OPC_NOT_B = 30,
81 OPC_XOR_B = 31,
82 /* 32 - invalid */
83 OPC_CMPV_U = 33,
84 OPC_CMPV_S = 34,
85 /* 35-47 - invalid */
86 OPC_MUL_U = 48,
87 OPC_MUL_S = 49,
88 OPC_MULL_U = 50,
89 OPC_BFREV_B = 51,
90 OPC_CLZ_S = 52,
91 OPC_CLZ_B = 53,
92 OPC_SHL_B = 54,
93 OPC_SHR_B = 55,
94 OPC_ASHR_B = 56,
95 OPC_BARY_F = 57,
96 OPC_MGEN_B = 58,
97 OPC_GETBIT_B = 59,
98 OPC_SETRM = 60,
99 OPC_CBITS_B = 61,
100 OPC_SHB = 62,
101 OPC_MSAD = 63,
102
103 /* category 3: */
104 OPC_MAD_U16 = 0,
105 OPC_MADSH_U16 = 1,
106 OPC_MAD_S16 = 2,
107 OPC_MADSH_M16 = 3, /* should this be .s16? */
108 OPC_MAD_U24 = 4,
109 OPC_MAD_S24 = 5,
110 OPC_MAD_F16 = 6,
111 OPC_MAD_F32 = 7,
112 OPC_SEL_B16 = 8,
113 OPC_SEL_B32 = 9,
114 OPC_SEL_S16 = 10,
115 OPC_SEL_S32 = 11,
116 OPC_SEL_F16 = 12,
117 OPC_SEL_F32 = 13,
118 OPC_SAD_S16 = 14,
119 OPC_SAD_S32 = 15,
120
121 /* category 4: */
122 OPC_RCP = 0,
123 OPC_RSQ = 1,
124 OPC_LOG2 = 2,
125 OPC_EXP2 = 3,
126 OPC_SIN = 4,
127 OPC_COS = 5,
128 OPC_SQRT = 6,
129 // 7-63 - invalid
130
131 /* category 5: */
132 OPC_ISAM = 0,
133 OPC_ISAML = 1,
134 OPC_ISAMM = 2,
135 OPC_SAM = 3,
136 OPC_SAMB = 4,
137 OPC_SAML = 5,
138 OPC_SAMGQ = 6,
139 OPC_GETLOD = 7,
140 OPC_CONV = 8,
141 OPC_CONVM = 9,
142 OPC_GETSIZE = 10,
143 OPC_GETBUF = 11,
144 OPC_GETPOS = 12,
145 OPC_GETINFO = 13,
146 OPC_DSX = 14,
147 OPC_DSY = 15,
148 OPC_GATHER4R = 16,
149 OPC_GATHER4G = 17,
150 OPC_GATHER4B = 18,
151 OPC_GATHER4A = 19,
152 OPC_SAMGP0 = 20,
153 OPC_SAMGP1 = 21,
154 OPC_SAMGP2 = 22,
155 OPC_SAMGP3 = 23,
156 OPC_DSXPP_1 = 24,
157 OPC_DSYPP_1 = 25,
158 OPC_RGETPOS = 26,
159 OPC_RGETINFO = 27,
160
161 /* category 6: */
162 OPC_LDG = 0, /* load-global */
163 OPC_LDL = 1,
164 OPC_LDP = 2,
165 OPC_STG = 3, /* store-global */
166 OPC_STL = 4,
167 OPC_STP = 5,
168 OPC_STI = 6,
169 OPC_G2L = 7,
170 OPC_L2G = 8,
171 OPC_PREFETCH = 9,
172 OPC_LDLW = 10,
173 OPC_STLW = 11,
174 OPC_RESFMT = 14,
175 OPC_RESINFO = 15,
176 OPC_ATOMIC_ADD = 16,
177 OPC_ATOMIC_SUB = 17,
178 OPC_ATOMIC_XCHG = 18,
179 OPC_ATOMIC_INC = 19,
180 OPC_ATOMIC_DEC = 20,
181 OPC_ATOMIC_CMPXCHG = 21,
182 OPC_ATOMIC_MIN = 22,
183 OPC_ATOMIC_MAX = 23,
184 OPC_ATOMIC_AND = 24,
185 OPC_ATOMIC_OR = 25,
186 OPC_ATOMIC_XOR = 26,
187 OPC_LDGB_TYPED_4D = 27,
188 OPC_STGB_4D_4 = 28,
189 OPC_STIB = 29,
190 OPC_LDC_4 = 30,
191 OPC_LDLV = 31,
192
193 /* meta instructions (category -1): */
194 /* placeholder instr to mark shader inputs: */
195 OPC_META_INPUT = 0,
196 OPC_META_PHI = 1,
197 /* The "fan-in" and "fan-out" instructions are used for keeping
198 * track of instructions that write to multiple dst registers
199 * (fan-out) like texture sample instructions, or read multiple
200 * consecutive scalar registers (fan-in) (bary.f, texture samp)
201 */
202 OPC_META_FO = 2,
203 OPC_META_FI = 3,
204
205 } opc_t;
206
207 typedef enum {
208 TYPE_F16 = 0,
209 TYPE_F32 = 1,
210 TYPE_U16 = 2,
211 TYPE_U32 = 3,
212 TYPE_S16 = 4,
213 TYPE_S32 = 5,
214 TYPE_U8 = 6,
215 TYPE_S8 = 7, // XXX I assume?
216 } type_t;
217
218 static inline uint32_t type_size(type_t type)
219 {
220 switch (type) {
221 case TYPE_F32:
222 case TYPE_U32:
223 case TYPE_S32:
224 return 32;
225 case TYPE_F16:
226 case TYPE_U16:
227 case TYPE_S16:
228 return 16;
229 case TYPE_U8:
230 case TYPE_S8:
231 return 8;
232 default:
233 assert(0); /* invalid type */
234 return 0;
235 }
236 }
237
238 static inline int type_float(type_t type)
239 {
240 return (type == TYPE_F32) || (type == TYPE_F16);
241 }
242
243 static inline int type_uint(type_t type)
244 {
245 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
246 }
247
248 static inline int type_sint(type_t type)
249 {
250 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
251 }
252
253 typedef union PACKED {
254 /* normal gpr or const src register: */
255 struct PACKED {
256 uint32_t comp : 2;
257 uint32_t num : 10;
258 };
259 /* for immediate val: */
260 int32_t iim_val : 11;
261 /* to make compiler happy: */
262 uint32_t dummy32;
263 uint32_t dummy10 : 10;
264 uint32_t dummy11 : 11;
265 uint32_t dummy12 : 12;
266 uint32_t dummy13 : 13;
267 uint32_t dummy8 : 8;
268 } reg_t;
269
270 /* special registers: */
271 #define REG_A0 61 /* address register */
272 #define REG_P0 62 /* predicate register */
273
274 static inline int reg_special(reg_t reg)
275 {
276 return (reg.num == REG_A0) || (reg.num == REG_P0);
277 }
278
279 typedef struct PACKED {
280 /* dword0: */
281 union PACKED {
282 struct PACKED {
283 int16_t immed : 16;
284 uint32_t dummy1 : 16;
285 } a3xx;
286 struct PACKED {
287 int32_t immed : 20;
288 uint32_t dummy1 : 12;
289 } a4xx;
290 };
291
292 /* dword1: */
293 uint32_t dummy2 : 8;
294 uint32_t repeat : 3;
295 uint32_t dummy3 : 1;
296 uint32_t ss : 1;
297 uint32_t dummy4 : 7;
298 uint32_t inv : 1;
299 uint32_t comp : 2;
300 uint32_t opc : 4;
301 uint32_t jmp_tgt : 1;
302 uint32_t sync : 1;
303 uint32_t opc_cat : 3;
304 } instr_cat0_t;
305
306 typedef struct PACKED {
307 /* dword0: */
308 union PACKED {
309 /* for normal src register: */
310 struct PACKED {
311 uint32_t src : 11;
312 /* at least low bit of pad must be zero or it will
313 * look like a address relative src
314 */
315 uint32_t pad : 21;
316 };
317 /* for address relative: */
318 struct PACKED {
319 int32_t off : 10;
320 uint32_t src_rel_c : 1;
321 uint32_t src_rel : 1;
322 uint32_t unknown : 20;
323 };
324 /* for immediate: */
325 int32_t iim_val;
326 uint32_t uim_val;
327 float fim_val;
328 };
329
330 /* dword1: */
331 uint32_t dst : 8;
332 uint32_t repeat : 3;
333 uint32_t src_r : 1;
334 uint32_t ss : 1;
335 uint32_t ul : 1;
336 uint32_t dst_type : 3;
337 uint32_t dst_rel : 1;
338 uint32_t src_type : 3;
339 uint32_t src_c : 1;
340 uint32_t src_im : 1;
341 uint32_t even : 1;
342 uint32_t pos_inf : 1;
343 uint32_t must_be_0 : 2;
344 uint32_t jmp_tgt : 1;
345 uint32_t sync : 1;
346 uint32_t opc_cat : 3;
347 } instr_cat1_t;
348
349 typedef struct PACKED {
350 /* dword0: */
351 union PACKED {
352 struct PACKED {
353 uint32_t src1 : 11;
354 uint32_t must_be_zero1: 2;
355 uint32_t src1_im : 1; /* immediate */
356 uint32_t src1_neg : 1; /* negate */
357 uint32_t src1_abs : 1; /* absolute value */
358 };
359 struct PACKED {
360 uint32_t src1 : 10;
361 uint32_t src1_c : 1; /* relative-const */
362 uint32_t src1_rel : 1; /* relative address */
363 uint32_t must_be_zero : 1;
364 uint32_t dummy : 3;
365 } rel1;
366 struct PACKED {
367 uint32_t src1 : 12;
368 uint32_t src1_c : 1; /* const */
369 uint32_t dummy : 3;
370 } c1;
371 };
372
373 union PACKED {
374 struct PACKED {
375 uint32_t src2 : 11;
376 uint32_t must_be_zero2: 2;
377 uint32_t src2_im : 1; /* immediate */
378 uint32_t src2_neg : 1; /* negate */
379 uint32_t src2_abs : 1; /* absolute value */
380 };
381 struct PACKED {
382 uint32_t src2 : 10;
383 uint32_t src2_c : 1; /* relative-const */
384 uint32_t src2_rel : 1; /* relative address */
385 uint32_t must_be_zero : 1;
386 uint32_t dummy : 3;
387 } rel2;
388 struct PACKED {
389 uint32_t src2 : 12;
390 uint32_t src2_c : 1; /* const */
391 uint32_t dummy : 3;
392 } c2;
393 };
394
395 /* dword1: */
396 uint32_t dst : 8;
397 uint32_t repeat : 3;
398 uint32_t src1_r : 1;
399 uint32_t ss : 1;
400 uint32_t ul : 1; /* dunno */
401 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
402 uint32_t ei : 1;
403 uint32_t cond : 3;
404 uint32_t src2_r : 1;
405 uint32_t full : 1; /* not half */
406 uint32_t opc : 6;
407 uint32_t jmp_tgt : 1;
408 uint32_t sync : 1;
409 uint32_t opc_cat : 3;
410 } instr_cat2_t;
411
412 typedef struct PACKED {
413 /* dword0: */
414 union PACKED {
415 struct PACKED {
416 uint32_t src1 : 11;
417 uint32_t must_be_zero1: 2;
418 uint32_t src2_c : 1;
419 uint32_t src1_neg : 1;
420 uint32_t src2_r : 1;
421 };
422 struct PACKED {
423 uint32_t src1 : 10;
424 uint32_t src1_c : 1;
425 uint32_t src1_rel : 1;
426 uint32_t must_be_zero : 1;
427 uint32_t dummy : 3;
428 } rel1;
429 struct PACKED {
430 uint32_t src1 : 12;
431 uint32_t src1_c : 1;
432 uint32_t dummy : 3;
433 } c1;
434 };
435
436 union PACKED {
437 struct PACKED {
438 uint32_t src3 : 11;
439 uint32_t must_be_zero2: 2;
440 uint32_t src3_r : 1;
441 uint32_t src2_neg : 1;
442 uint32_t src3_neg : 1;
443 };
444 struct PACKED {
445 uint32_t src3 : 10;
446 uint32_t src3_c : 1;
447 uint32_t src3_rel : 1;
448 uint32_t must_be_zero : 1;
449 uint32_t dummy : 3;
450 } rel2;
451 struct PACKED {
452 uint32_t src3 : 12;
453 uint32_t src3_c : 1;
454 uint32_t dummy : 3;
455 } c2;
456 };
457
458 /* dword1: */
459 uint32_t dst : 8;
460 uint32_t repeat : 3;
461 uint32_t src1_r : 1;
462 uint32_t ss : 1;
463 uint32_t ul : 1;
464 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
465 uint32_t src2 : 8;
466 uint32_t opc : 4;
467 uint32_t jmp_tgt : 1;
468 uint32_t sync : 1;
469 uint32_t opc_cat : 3;
470 } instr_cat3_t;
471
472 static inline bool instr_cat3_full(instr_cat3_t *cat3)
473 {
474 switch (cat3->opc) {
475 case OPC_MAD_F16:
476 case OPC_MAD_U16:
477 case OPC_MAD_S16:
478 case OPC_SEL_B16:
479 case OPC_SEL_S16:
480 case OPC_SEL_F16:
481 case OPC_SAD_S16:
482 case OPC_SAD_S32: // really??
483 return false;
484 default:
485 return true;
486 }
487 }
488
489 typedef struct PACKED {
490 /* dword0: */
491 union PACKED {
492 struct PACKED {
493 uint32_t src : 11;
494 uint32_t must_be_zero1: 2;
495 uint32_t src_im : 1; /* immediate */
496 uint32_t src_neg : 1; /* negate */
497 uint32_t src_abs : 1; /* absolute value */
498 };
499 struct PACKED {
500 uint32_t src : 10;
501 uint32_t src_c : 1; /* relative-const */
502 uint32_t src_rel : 1; /* relative address */
503 uint32_t must_be_zero : 1;
504 uint32_t dummy : 3;
505 } rel;
506 struct PACKED {
507 uint32_t src : 12;
508 uint32_t src_c : 1; /* const */
509 uint32_t dummy : 3;
510 } c;
511 };
512 uint32_t dummy1 : 16; /* seem to be ignored */
513
514 /* dword1: */
515 uint32_t dst : 8;
516 uint32_t repeat : 3;
517 uint32_t src_r : 1;
518 uint32_t ss : 1;
519 uint32_t ul : 1;
520 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
521 uint32_t dummy2 : 5; /* seem to be ignored */
522 uint32_t full : 1; /* not half */
523 uint32_t opc : 6;
524 uint32_t jmp_tgt : 1;
525 uint32_t sync : 1;
526 uint32_t opc_cat : 3;
527 } instr_cat4_t;
528
529 typedef struct PACKED {
530 /* dword0: */
531 union PACKED {
532 /* normal case: */
533 struct PACKED {
534 uint32_t full : 1; /* not half */
535 uint32_t src1 : 8;
536 uint32_t src2 : 8;
537 uint32_t dummy1 : 4; /* seem to be ignored */
538 uint32_t samp : 4;
539 uint32_t tex : 7;
540 } norm;
541 /* s2en case: */
542 struct PACKED {
543 uint32_t full : 1; /* not half */
544 uint32_t src1 : 8;
545 uint32_t src2 : 11;
546 uint32_t dummy1 : 1;
547 uint32_t src3 : 8;
548 uint32_t dummy2 : 3;
549 } s2en;
550 /* same in either case: */
551 // XXX I think, confirm this
552 struct PACKED {
553 uint32_t full : 1; /* not half */
554 uint32_t src1 : 8;
555 uint32_t pad : 23;
556 };
557 };
558
559 /* dword1: */
560 uint32_t dst : 8;
561 uint32_t wrmask : 4; /* write-mask */
562 uint32_t type : 3;
563 uint32_t dummy2 : 1; /* seems to be ignored */
564 uint32_t is_3d : 1;
565
566 uint32_t is_a : 1;
567 uint32_t is_s : 1;
568 uint32_t is_s2en : 1;
569 uint32_t is_o : 1;
570 uint32_t is_p : 1;
571
572 uint32_t opc : 5;
573 uint32_t jmp_tgt : 1;
574 uint32_t sync : 1;
575 uint32_t opc_cat : 3;
576 } instr_cat5_t;
577
578 /* dword0 encoding for src_off: [src1 + off], src2: */
579 typedef struct PACKED {
580 /* dword0: */
581 uint32_t mustbe1 : 1;
582 int32_t off : 13;
583 uint32_t src1 : 8;
584 uint32_t src1_im : 1;
585 uint32_t src2_im : 1;
586 uint32_t src2 : 8;
587
588 /* dword1: */
589 uint32_t dword1;
590 } instr_cat6a_t;
591
592 /* dword0 encoding for !src_off: [src1], src2 */
593 typedef struct PACKED {
594 /* dword0: */
595 uint32_t mustbe0 : 1;
596 uint32_t src1 : 13;
597 uint32_t ignore0 : 8;
598 uint32_t src1_im : 1;
599 uint32_t src2_im : 1;
600 uint32_t src2 : 8;
601
602 /* dword1: */
603 uint32_t dword1;
604 } instr_cat6b_t;
605
606 /* dword1 encoding for dst_off: */
607 typedef struct PACKED {
608 /* dword0: */
609 uint32_t dword0;
610
611 /* note: there is some weird stuff going on where sometimes
612 * cat6->a.off is involved.. but that seems like a bug in
613 * the blob, since it is used even if !cat6->src_off
614 * It would make sense for there to be some more bits to
615 * bring us to 11 bits worth of offset, but not sure..
616 */
617 int32_t off : 8;
618 uint32_t mustbe1 : 1;
619 uint32_t dst : 8;
620 uint32_t pad1 : 15;
621 } instr_cat6c_t;
622
623 /* dword1 encoding for !dst_off: */
624 typedef struct PACKED {
625 /* dword0: */
626 uint32_t dword0;
627
628 uint32_t dst : 8;
629 uint32_t mustbe0 : 1;
630 uint32_t pad0 : 23;
631 } instr_cat6d_t;
632
633 /* I think some of the other cat6 instructions use additional
634 * sub-encodings..
635 */
636
637 typedef union PACKED {
638 instr_cat6a_t a;
639 instr_cat6b_t b;
640 instr_cat6c_t c;
641 instr_cat6d_t d;
642 struct PACKED {
643 /* dword0: */
644 uint32_t src_off : 1;
645 uint32_t pad1 : 31;
646
647 /* dword1: */
648 uint32_t pad2 : 8;
649 uint32_t dst_off : 1;
650 uint32_t pad3 : 8;
651 uint32_t type : 3;
652 uint32_t g : 1; /* or in some cases it means dst immed */
653 uint32_t pad4 : 1;
654 uint32_t opc : 5;
655 uint32_t jmp_tgt : 1;
656 uint32_t sync : 1;
657 uint32_t opc_cat : 3;
658 };
659 } instr_cat6_t;
660
661 typedef union PACKED {
662 instr_cat0_t cat0;
663 instr_cat1_t cat1;
664 instr_cat2_t cat2;
665 instr_cat3_t cat3;
666 instr_cat4_t cat4;
667 instr_cat5_t cat5;
668 instr_cat6_t cat6;
669 struct PACKED {
670 /* dword0: */
671 uint64_t pad1 : 40;
672 uint32_t repeat : 3; /* cat0-cat4 */
673 uint32_t pad2 : 1;
674 uint32_t ss : 1; /* cat1-cat4 (cat0??) */
675 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
676 uint32_t pad3 : 13;
677 uint32_t jmp_tgt : 1;
678 uint32_t sync : 1;
679 uint32_t opc_cat : 3;
680
681 };
682 } instr_t;
683
684 static inline uint32_t instr_opc(instr_t *instr)
685 {
686 switch (instr->opc_cat) {
687 case 0: return instr->cat0.opc;
688 case 1: return 0;
689 case 2: return instr->cat2.opc;
690 case 3: return instr->cat3.opc;
691 case 4: return instr->cat4.opc;
692 case 5: return instr->cat5.opc;
693 case 6: return instr->cat6.opc;
694 default: return 0;
695 }
696 }
697
698 static inline bool is_mad(opc_t opc)
699 {
700 switch (opc) {
701 case OPC_MAD_U16:
702 case OPC_MAD_S16:
703 case OPC_MAD_U24:
704 case OPC_MAD_S24:
705 case OPC_MAD_F16:
706 case OPC_MAD_F32:
707 return true;
708 default:
709 return false;
710 }
711 }
712
713 static inline bool is_madsh(opc_t opc)
714 {
715 switch (opc) {
716 case OPC_MADSH_U16:
717 case OPC_MADSH_M16:
718 return true;
719 default:
720 return false;
721 }
722 }
723
724 #endif /* INSTR_A3XX_H_ */