st/nine: Add D3DFMT_DF16 support
[mesa.git] / src / gallium / drivers / freedreno / ir3 / instr-a3xx.h
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <stdint.h>
30 #include <assert.h>
31
32 typedef enum {
33 /* category 0: */
34 OPC_NOP = 0,
35 OPC_BR = 1,
36 OPC_JUMP = 2,
37 OPC_CALL = 3,
38 OPC_RET = 4,
39 OPC_KILL = 5,
40 OPC_END = 6,
41 OPC_EMIT = 7,
42 OPC_CUT = 8,
43 OPC_CHMASK = 9,
44 OPC_CHSH = 10,
45 OPC_FLOW_REV = 11,
46
47 /* category 1: */
48 /* no opc.. all category 1 are variants of mov */
49
50 /* category 2: */
51 OPC_ADD_F = 0,
52 OPC_MIN_F = 1,
53 OPC_MAX_F = 2,
54 OPC_MUL_F = 3,
55 OPC_SIGN_F = 4,
56 OPC_CMPS_F = 5,
57 OPC_ABSNEG_F = 6,
58 OPC_CMPV_F = 7,
59 /* 8 - invalid */
60 OPC_FLOOR_F = 9,
61 OPC_CEIL_F = 10,
62 OPC_RNDNE_F = 11,
63 OPC_RNDAZ_F = 12,
64 OPC_TRUNC_F = 13,
65 /* 14-15 - invalid */
66 OPC_ADD_U = 16,
67 OPC_ADD_S = 17,
68 OPC_SUB_U = 18,
69 OPC_SUB_S = 19,
70 OPC_CMPS_U = 20,
71 OPC_CMPS_S = 21,
72 OPC_MIN_U = 22,
73 OPC_MIN_S = 23,
74 OPC_MAX_U = 24,
75 OPC_MAX_S = 25,
76 OPC_ABSNEG_S = 26,
77 /* 27 - invalid */
78 OPC_AND_B = 28,
79 OPC_OR_B = 29,
80 OPC_NOT_B = 30,
81 OPC_XOR_B = 31,
82 /* 32 - invalid */
83 OPC_CMPV_U = 33,
84 OPC_CMPV_S = 34,
85 /* 35-47 - invalid */
86 OPC_MUL_U = 48,
87 OPC_MUL_S = 49,
88 OPC_MULL_U = 50,
89 OPC_BFREV_B = 51,
90 OPC_CLZ_S = 52,
91 OPC_CLZ_B = 53,
92 OPC_SHL_B = 54,
93 OPC_SHR_B = 55,
94 OPC_ASHR_B = 56,
95 OPC_BARY_F = 57,
96 OPC_MGEN_B = 58,
97 OPC_GETBIT_B = 59,
98 OPC_SETRM = 60,
99 OPC_CBITS_B = 61,
100 OPC_SHB = 62,
101 OPC_MSAD = 63,
102
103 /* category 3: */
104 OPC_MAD_U16 = 0,
105 OPC_MADSH_U16 = 1,
106 OPC_MAD_S16 = 2,
107 OPC_MADSH_M16 = 3, /* should this be .s16? */
108 OPC_MAD_U24 = 4,
109 OPC_MAD_S24 = 5,
110 OPC_MAD_F16 = 6,
111 OPC_MAD_F32 = 7,
112 OPC_SEL_B16 = 8,
113 OPC_SEL_B32 = 9,
114 OPC_SEL_S16 = 10,
115 OPC_SEL_S32 = 11,
116 OPC_SEL_F16 = 12,
117 OPC_SEL_F32 = 13,
118 OPC_SAD_S16 = 14,
119 OPC_SAD_S32 = 15,
120
121 /* category 4: */
122 OPC_RCP = 0,
123 OPC_RSQ = 1,
124 OPC_LOG2 = 2,
125 OPC_EXP2 = 3,
126 OPC_SIN = 4,
127 OPC_COS = 5,
128 OPC_SQRT = 6,
129 // 7-63 - invalid
130
131 /* category 5: */
132 OPC_ISAM = 0,
133 OPC_ISAML = 1,
134 OPC_ISAMM = 2,
135 OPC_SAM = 3,
136 OPC_SAMB = 4,
137 OPC_SAML = 5,
138 OPC_SAMGQ = 6,
139 OPC_GETLOD = 7,
140 OPC_CONV = 8,
141 OPC_CONVM = 9,
142 OPC_GETSIZE = 10,
143 OPC_GETBUF = 11,
144 OPC_GETPOS = 12,
145 OPC_GETINFO = 13,
146 OPC_DSX = 14,
147 OPC_DSY = 15,
148 OPC_GATHER4R = 16,
149 OPC_GATHER4G = 17,
150 OPC_GATHER4B = 18,
151 OPC_GATHER4A = 19,
152 OPC_SAMGP0 = 20,
153 OPC_SAMGP1 = 21,
154 OPC_SAMGP2 = 22,
155 OPC_SAMGP3 = 23,
156 OPC_DSXPP_1 = 24,
157 OPC_DSYPP_1 = 25,
158 OPC_RGETPOS = 26,
159 OPC_RGETINFO = 27,
160
161 /* category 6: */
162 OPC_LDG = 0, /* load-global */
163 OPC_LDL = 1,
164 OPC_LDP = 2,
165 OPC_STG = 3, /* store-global */
166 OPC_STL = 4,
167 OPC_STP = 5,
168 OPC_STI = 6,
169 OPC_G2L = 7,
170 OPC_L2G = 8,
171 OPC_PREFETCH = 9,
172 OPC_LDLW = 10,
173 OPC_STLW = 11,
174 OPC_RESFMT = 14,
175 OPC_RESINFO = 15,
176 OPC_ATOMIC_ADD_L = 16,
177 OPC_ATOMIC_SUB_L = 17,
178 OPC_ATOMIC_XCHG_L = 18,
179 OPC_ATOMIC_INC_L = 19,
180 OPC_ATOMIC_DEC_L = 20,
181 OPC_ATOMIC_CMPXCHG_L = 21,
182 OPC_ATOMIC_MIN_L = 22,
183 OPC_ATOMIC_MAX_L = 23,
184 OPC_ATOMIC_AND_L = 24,
185 OPC_ATOMIC_OR_L = 25,
186 OPC_ATOMIC_XOR_L = 26,
187 OPC_LDGB_TYPED_4D = 27,
188 OPC_STGB_4D_4 = 28,
189 OPC_STIB = 29,
190 OPC_LDC_4 = 30,
191 OPC_LDLV = 31,
192
193 /* meta instructions (category -1): */
194 /* placeholder instr to mark inputs/outputs: */
195 OPC_META_INPUT = 0,
196 OPC_META_OUTPUT = 1,
197 /* The "fan-in" and "fan-out" instructions are used for keeping
198 * track of instructions that write to multiple dst registers
199 * (fan-out) like texture sample instructions, or read multiple
200 * consecutive scalar registers (fan-in) (bary.f, texture samp)
201 */
202 OPC_META_FO = 2,
203 OPC_META_FI = 3,
204 /* branches/flow control */
205 OPC_META_FLOW = 4,
206 OPC_META_PHI = 5,
207 /* relative addressing */
208 OPC_META_DEREF = 6,
209
210
211 } opc_t;
212
213 typedef enum {
214 TYPE_F16 = 0,
215 TYPE_F32 = 1,
216 TYPE_U16 = 2,
217 TYPE_U32 = 3,
218 TYPE_S16 = 4,
219 TYPE_S32 = 5,
220 TYPE_U8 = 6,
221 TYPE_S8 = 7, // XXX I assume?
222 } type_t;
223
224 static inline uint32_t type_size(type_t type)
225 {
226 switch (type) {
227 case TYPE_F32:
228 case TYPE_U32:
229 case TYPE_S32:
230 return 32;
231 case TYPE_F16:
232 case TYPE_U16:
233 case TYPE_S16:
234 return 16;
235 case TYPE_U8:
236 case TYPE_S8:
237 return 8;
238 default:
239 assert(0); /* invalid type */
240 return 0;
241 }
242 }
243
244 static inline int type_float(type_t type)
245 {
246 return (type == TYPE_F32) || (type == TYPE_F16);
247 }
248
249 static inline int type_uint(type_t type)
250 {
251 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
252 }
253
254 static inline int type_sint(type_t type)
255 {
256 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
257 }
258
259 typedef union PACKED {
260 /* normal gpr or const src register: */
261 struct PACKED {
262 uint32_t comp : 2;
263 uint32_t num : 10;
264 };
265 /* for immediate val: */
266 int32_t iim_val : 11;
267 /* to make compiler happy: */
268 uint32_t dummy32;
269 uint32_t dummy10 : 10;
270 uint32_t dummy11 : 11;
271 uint32_t dummy12 : 12;
272 uint32_t dummy13 : 13;
273 uint32_t dummy8 : 8;
274 } reg_t;
275
276 /* special registers: */
277 #define REG_A0 61 /* address register */
278 #define REG_P0 62 /* predicate register */
279
280 static inline int reg_special(reg_t reg)
281 {
282 return (reg.num == REG_A0) || (reg.num == REG_P0);
283 }
284
285 typedef struct PACKED {
286 /* dword0: */
287 int16_t immed : 16;
288 uint32_t dummy1 : 16;
289
290 /* dword1: */
291 uint32_t dummy2 : 8;
292 uint32_t repeat : 3;
293 uint32_t dummy3 : 1;
294 uint32_t ss : 1;
295 uint32_t dummy4 : 7;
296 uint32_t inv : 1;
297 uint32_t comp : 2;
298 uint32_t opc : 4;
299 uint32_t jmp_tgt : 1;
300 uint32_t sync : 1;
301 uint32_t opc_cat : 3;
302 } instr_cat0_t;
303
304 typedef struct PACKED {
305 /* dword0: */
306 union PACKED {
307 /* for normal src register: */
308 struct PACKED {
309 uint32_t src : 11;
310 /* at least low bit of pad must be zero or it will
311 * look like a address relative src
312 */
313 uint32_t pad : 21;
314 };
315 /* for address relative: */
316 struct PACKED {
317 int32_t off : 10;
318 uint32_t src_rel_c : 1;
319 uint32_t src_rel : 1;
320 uint32_t unknown : 20;
321 };
322 /* for immediate: */
323 int32_t iim_val;
324 float fim_val;
325 };
326
327 /* dword1: */
328 uint32_t dst : 8;
329 uint32_t repeat : 3;
330 uint32_t src_r : 1;
331 uint32_t ss : 1;
332 uint32_t ul : 1;
333 uint32_t dst_type : 3;
334 uint32_t dst_rel : 1;
335 uint32_t src_type : 3;
336 uint32_t src_c : 1;
337 uint32_t src_im : 1;
338 uint32_t even : 1;
339 uint32_t pos_inf : 1;
340 uint32_t must_be_0 : 2;
341 uint32_t jmp_tgt : 1;
342 uint32_t sync : 1;
343 uint32_t opc_cat : 3;
344 } instr_cat1_t;
345
346 typedef struct PACKED {
347 /* dword0: */
348 union PACKED {
349 struct PACKED {
350 uint32_t src1 : 11;
351 uint32_t must_be_zero1: 2;
352 uint32_t src1_im : 1; /* immediate */
353 uint32_t src1_neg : 1; /* negate */
354 uint32_t src1_abs : 1; /* absolute value */
355 };
356 struct PACKED {
357 uint32_t src1 : 10;
358 uint32_t src1_c : 1; /* relative-const */
359 uint32_t src1_rel : 1; /* relative address */
360 uint32_t must_be_zero : 1;
361 uint32_t dummy : 3;
362 } rel1;
363 struct PACKED {
364 uint32_t src1 : 12;
365 uint32_t src1_c : 1; /* const */
366 uint32_t dummy : 3;
367 } c1;
368 };
369
370 union PACKED {
371 struct PACKED {
372 uint32_t src2 : 11;
373 uint32_t must_be_zero2: 2;
374 uint32_t src2_im : 1; /* immediate */
375 uint32_t src2_neg : 1; /* negate */
376 uint32_t src2_abs : 1; /* absolute value */
377 };
378 struct PACKED {
379 uint32_t src2 : 10;
380 uint32_t src2_c : 1; /* relative-const */
381 uint32_t src2_rel : 1; /* relative address */
382 uint32_t must_be_zero : 1;
383 uint32_t dummy : 3;
384 } rel2;
385 struct PACKED {
386 uint32_t src2 : 12;
387 uint32_t src2_c : 1; /* const */
388 uint32_t dummy : 3;
389 } c2;
390 };
391
392 /* dword1: */
393 uint32_t dst : 8;
394 uint32_t repeat : 3;
395 uint32_t src1_r : 1;
396 uint32_t ss : 1;
397 uint32_t ul : 1; /* dunno */
398 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
399 uint32_t ei : 1;
400 uint32_t cond : 3;
401 uint32_t src2_r : 1;
402 uint32_t full : 1; /* not half */
403 uint32_t opc : 6;
404 uint32_t jmp_tgt : 1;
405 uint32_t sync : 1;
406 uint32_t opc_cat : 3;
407 } instr_cat2_t;
408
409 typedef struct PACKED {
410 /* dword0: */
411 union PACKED {
412 struct PACKED {
413 uint32_t src1 : 11;
414 uint32_t must_be_zero1: 2;
415 uint32_t src2_c : 1;
416 uint32_t src1_neg : 1;
417 uint32_t src2_r : 1;
418 };
419 struct PACKED {
420 uint32_t src1 : 10;
421 uint32_t src1_c : 1;
422 uint32_t src1_rel : 1;
423 uint32_t must_be_zero : 1;
424 uint32_t dummy : 3;
425 } rel1;
426 struct PACKED {
427 uint32_t src1 : 12;
428 uint32_t src1_c : 1;
429 uint32_t dummy : 3;
430 } c1;
431 };
432
433 union PACKED {
434 struct PACKED {
435 uint32_t src3 : 11;
436 uint32_t must_be_zero2: 2;
437 uint32_t src3_r : 1;
438 uint32_t src2_neg : 1;
439 uint32_t src3_neg : 1;
440 };
441 struct PACKED {
442 uint32_t src3 : 10;
443 uint32_t src3_c : 1;
444 uint32_t src3_rel : 1;
445 uint32_t must_be_zero : 1;
446 uint32_t dummy : 3;
447 } rel2;
448 struct PACKED {
449 uint32_t src3 : 12;
450 uint32_t src3_c : 1;
451 uint32_t dummy : 3;
452 } c2;
453 };
454
455 /* dword1: */
456 uint32_t dst : 8;
457 uint32_t repeat : 3;
458 uint32_t src1_r : 1;
459 uint32_t ss : 1;
460 uint32_t ul : 1;
461 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
462 uint32_t src2 : 8;
463 uint32_t opc : 4;
464 uint32_t jmp_tgt : 1;
465 uint32_t sync : 1;
466 uint32_t opc_cat : 3;
467 } instr_cat3_t;
468
469 static inline bool instr_cat3_full(instr_cat3_t *cat3)
470 {
471 switch (cat3->opc) {
472 case OPC_MAD_F16:
473 case OPC_MAD_U16:
474 case OPC_MAD_S16:
475 case OPC_SEL_B16:
476 case OPC_SEL_S16:
477 case OPC_SEL_F16:
478 case OPC_SAD_S16:
479 case OPC_SAD_S32: // really??
480 return false;
481 default:
482 return true;
483 }
484 }
485
486 typedef struct PACKED {
487 /* dword0: */
488 union PACKED {
489 struct PACKED {
490 uint32_t src : 11;
491 uint32_t must_be_zero1: 2;
492 uint32_t src_im : 1; /* immediate */
493 uint32_t src_neg : 1; /* negate */
494 uint32_t src_abs : 1; /* absolute value */
495 };
496 struct PACKED {
497 uint32_t src : 10;
498 uint32_t src_c : 1; /* relative-const */
499 uint32_t src_rel : 1; /* relative address */
500 uint32_t must_be_zero : 1;
501 uint32_t dummy : 3;
502 } rel;
503 struct PACKED {
504 uint32_t src : 12;
505 uint32_t src_c : 1; /* const */
506 uint32_t dummy : 3;
507 } c;
508 };
509 uint32_t dummy1 : 16; /* seem to be ignored */
510
511 /* dword1: */
512 uint32_t dst : 8;
513 uint32_t repeat : 3;
514 uint32_t src_r : 1;
515 uint32_t ss : 1;
516 uint32_t ul : 1;
517 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
518 uint32_t dummy2 : 5; /* seem to be ignored */
519 uint32_t full : 1; /* not half */
520 uint32_t opc : 6;
521 uint32_t jmp_tgt : 1;
522 uint32_t sync : 1;
523 uint32_t opc_cat : 3;
524 } instr_cat4_t;
525
526 typedef struct PACKED {
527 /* dword0: */
528 union PACKED {
529 /* normal case: */
530 struct PACKED {
531 uint32_t full : 1; /* not half */
532 uint32_t src1 : 8;
533 uint32_t src2 : 8;
534 uint32_t dummy1 : 4; /* seem to be ignored */
535 uint32_t samp : 4;
536 uint32_t tex : 7;
537 } norm;
538 /* s2en case: */
539 struct PACKED {
540 uint32_t full : 1; /* not half */
541 uint32_t src1 : 8;
542 uint32_t src2 : 11;
543 uint32_t dummy1 : 1;
544 uint32_t src3 : 8;
545 uint32_t dummy2 : 3;
546 } s2en;
547 /* same in either case: */
548 // XXX I think, confirm this
549 struct PACKED {
550 uint32_t full : 1; /* not half */
551 uint32_t src1 : 8;
552 uint32_t pad : 23;
553 };
554 };
555
556 /* dword1: */
557 uint32_t dst : 8;
558 uint32_t wrmask : 4; /* write-mask */
559 uint32_t type : 3;
560 uint32_t dummy2 : 1; /* seems to be ignored */
561 uint32_t is_3d : 1;
562
563 uint32_t is_a : 1;
564 uint32_t is_s : 1;
565 uint32_t is_s2en : 1;
566 uint32_t is_o : 1;
567 uint32_t is_p : 1;
568
569 uint32_t opc : 5;
570 uint32_t jmp_tgt : 1;
571 uint32_t sync : 1;
572 uint32_t opc_cat : 3;
573 } instr_cat5_t;
574
575 /* used for load instructions: */
576 typedef struct PACKED {
577 /* dword0: */
578 uint32_t must_be_one1 : 1;
579 int16_t off : 13;
580 uint32_t src : 8;
581 uint32_t dummy1 : 1;
582 uint32_t must_be_one2 : 1;
583 int32_t iim_val : 8;
584
585 /* dword1: */
586 uint32_t dst : 8;
587 uint32_t dummy2 : 9;
588 uint32_t type : 3;
589 uint32_t dummy3 : 2;
590 uint32_t opc : 5;
591 uint32_t jmp_tgt : 1;
592 uint32_t sync : 1;
593 uint32_t opc_cat : 3;
594 } instr_cat6a_t;
595
596 /* used for store instructions: */
597 typedef struct PACKED {
598 /* dword0: */
599 uint32_t must_be_zero1 : 1;
600 uint32_t src : 8;
601 uint32_t off_hi : 5; /* high bits of 'off'... ugly! */
602 uint32_t dummy1 : 9;
603 uint32_t must_be_one1 : 1;
604 int32_t iim_val : 8;
605
606 /* dword1: */
607 uint16_t off : 8;
608 uint32_t must_be_one2 : 1;
609 uint32_t dst : 8;
610 uint32_t type : 3;
611 uint32_t dummy2 : 2;
612 uint32_t opc : 5;
613 uint32_t jmp_tgt : 1;
614 uint32_t sync : 1;
615 uint32_t opc_cat : 3;
616 } instr_cat6b_t;
617
618 typedef union PACKED {
619 instr_cat6a_t a;
620 instr_cat6b_t b;
621 struct PACKED {
622 /* dword0: */
623 uint32_t pad1 : 24;
624 int32_t iim_val : 8;
625
626 /* dword1: */
627 uint32_t pad2 : 17;
628 uint32_t type : 3;
629 uint32_t pad3 : 2;
630 uint32_t opc : 5;
631 uint32_t jmp_tgt : 1;
632 uint32_t sync : 1;
633 uint32_t opc_cat : 3;
634 };
635 } instr_cat6_t;
636
637 typedef union PACKED {
638 instr_cat0_t cat0;
639 instr_cat1_t cat1;
640 instr_cat2_t cat2;
641 instr_cat3_t cat3;
642 instr_cat4_t cat4;
643 instr_cat5_t cat5;
644 instr_cat6_t cat6;
645 struct PACKED {
646 /* dword0: */
647 uint64_t pad1 : 40;
648 uint32_t repeat : 3; /* cat0-cat4 */
649 uint32_t pad2 : 1;
650 uint32_t ss : 1; /* cat1-cat4 (cat0??) */
651 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
652 uint32_t pad3 : 13;
653 uint32_t jmp_tgt : 1;
654 uint32_t sync : 1;
655 uint32_t opc_cat : 3;
656
657 };
658 } instr_t;
659
660 static inline uint32_t instr_opc(instr_t *instr)
661 {
662 switch (instr->opc_cat) {
663 case 0: return instr->cat0.opc;
664 case 1: return 0;
665 case 2: return instr->cat2.opc;
666 case 3: return instr->cat3.opc;
667 case 4: return instr->cat4.opc;
668 case 5: return instr->cat5.opc;
669 case 6: return instr->cat6.opc;
670 default: return 0;
671 }
672 }
673
674 static inline bool is_mad(opc_t opc)
675 {
676 switch (opc) {
677 case OPC_MAD_U16:
678 case OPC_MADSH_U16:
679 case OPC_MAD_S16:
680 case OPC_MADSH_M16:
681 case OPC_MAD_U24:
682 case OPC_MAD_S24:
683 case OPC_MAD_F16:
684 case OPC_MAD_F32:
685 return true;
686 default:
687 return false;
688 }
689 }
690
691 #endif /* INSTR_A3XX_H_ */