freedreno: enabling binning and opt by default
[mesa.git] / src / gallium / drivers / freedreno / a3xx / instr-a3xx.h
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <stdint.h>
30 #include <assert.h>
31
32 typedef enum {
33 /* category 0: */
34 OPC_NOP = 0,
35 OPC_BR = 1,
36 OPC_JUMP = 2,
37 OPC_CALL = 3,
38 OPC_RET = 4,
39 OPC_KILL = 5,
40 OPC_END = 6,
41 OPC_EMIT = 7,
42 OPC_CUT = 8,
43 OPC_CHMASK = 9,
44 OPC_CHSH = 10,
45 OPC_FLOW_REV = 11,
46
47 /* category 1: */
48 /* no opc.. all category 1 are variants of mov */
49
50 /* category 2: */
51 OPC_ADD_F = 0,
52 OPC_MIN_F = 1,
53 OPC_MAX_F = 2,
54 OPC_MUL_F = 3,
55 OPC_SIGN_F = 4,
56 OPC_CMPS_F = 5,
57 OPC_ABSNEG_F = 6,
58 OPC_CMPV_F = 7,
59 /* 8 - invalid */
60 OPC_FLOOR_F = 9,
61 OPC_CEIL_F = 10,
62 OPC_RNDNE_F = 11,
63 OPC_RNDAZ_F = 12,
64 OPC_TRUNC_F = 13,
65 /* 14-15 - invalid */
66 OPC_ADD_U = 16,
67 OPC_ADD_S = 17,
68 OPC_SUB_U = 18,
69 OPC_SUB_S = 19,
70 OPC_CMPS_U = 20,
71 OPC_CMPS_S = 21,
72 OPC_MIN_U = 22,
73 OPC_MIN_S = 23,
74 OPC_MAX_U = 24,
75 OPC_MAX_S = 25,
76 OPC_ABSNEG_S = 26,
77 /* 27 - invalid */
78 OPC_AND_B = 28,
79 OPC_OR_B = 29,
80 OPC_NOT_B = 30,
81 OPC_XOR_B = 31,
82 /* 32 - invalid */
83 OPC_CMPV_U = 33,
84 OPC_CMPV_S = 34,
85 /* 35-47 - invalid */
86 OPC_MUL_U = 48,
87 OPC_MUL_S = 49,
88 OPC_MULL_U = 50,
89 OPC_BFREV_B = 51,
90 OPC_CLZ_S = 52,
91 OPC_CLZ_B = 53,
92 OPC_SHL_B = 54,
93 OPC_SHR_B = 55,
94 OPC_ASHR_B = 56,
95 OPC_BARY_F = 57,
96 OPC_MGEN_B = 58,
97 OPC_GETBIT_B = 59,
98 OPC_SETRM = 60,
99 OPC_CBITS_B = 61,
100 OPC_SHB = 62,
101 OPC_MSAD = 63,
102
103 /* category 3: */
104 OPC_MAD_U16 = 0,
105 OPC_MADSH_U16 = 1,
106 OPC_MAD_S16 = 2,
107 OPC_MADSH_M16 = 3, /* should this be .s16? */
108 OPC_MAD_U24 = 4,
109 OPC_MAD_S24 = 5,
110 OPC_MAD_F16 = 6,
111 OPC_MAD_F32 = 7,
112 OPC_SEL_B16 = 8,
113 OPC_SEL_B32 = 9,
114 OPC_SEL_S16 = 10,
115 OPC_SEL_S32 = 11,
116 OPC_SEL_F16 = 12,
117 OPC_SEL_F32 = 13,
118 OPC_SAD_S16 = 14,
119 OPC_SAD_S32 = 15,
120
121 /* category 4: */
122 OPC_RCP = 0,
123 OPC_RSQ = 1,
124 OPC_LOG2 = 2,
125 OPC_EXP2 = 3,
126 OPC_SIN = 4,
127 OPC_COS = 5,
128 OPC_SQRT = 6,
129 // 7-63 - invalid
130
131 /* category 5: */
132 OPC_ISAM = 0,
133 OPC_ISAML = 1,
134 OPC_ISAMM = 2,
135 OPC_SAM = 3,
136 OPC_SAMB = 4,
137 OPC_SAML = 5,
138 OPC_SAMGQ = 6,
139 OPC_GETLOD = 7,
140 OPC_CONV = 8,
141 OPC_CONVM = 9,
142 OPC_GETSIZE = 10,
143 OPC_GETBUF = 11,
144 OPC_GETPOS = 12,
145 OPC_GETINFO = 13,
146 OPC_DSX = 14,
147 OPC_DSY = 15,
148 OPC_GATHER4R = 16,
149 OPC_GATHER4G = 17,
150 OPC_GATHER4B = 18,
151 OPC_GATHER4A = 19,
152 OPC_SAMGP0 = 20,
153 OPC_SAMGP1 = 21,
154 OPC_SAMGP2 = 22,
155 OPC_SAMGP3 = 23,
156 OPC_DSXPP_1 = 24,
157 OPC_DSYPP_1 = 25,
158 OPC_RGETPOS = 26,
159 OPC_RGETINFO = 27,
160
161 /* category 6: */
162 OPC_LDG = 0, /* load-global */
163 OPC_LDL = 1,
164 OPC_LDP = 2,
165 OPC_STG = 3, /* store-global */
166 OPC_STL = 4,
167 OPC_STP = 5,
168 OPC_STI = 6,
169 OPC_G2L = 7,
170 OPC_L2G = 8,
171 OPC_PREFETCH = 9,
172 OPC_LDLW = 10,
173 OPC_STLW = 11,
174 OPC_RESFMT = 14,
175 OPC_RESINFO = 15,
176 OPC_ATOMIC_ADD_L = 16,
177 OPC_ATOMIC_SUB_L = 17,
178 OPC_ATOMIC_XCHG_L = 18,
179 OPC_ATOMIC_INC_L = 19,
180 OPC_ATOMIC_DEC_L = 20,
181 OPC_ATOMIC_CMPXCHG_L = 21,
182 OPC_ATOMIC_MIN_L = 22,
183 OPC_ATOMIC_MAX_L = 23,
184 OPC_ATOMIC_AND_L = 24,
185 OPC_ATOMIC_OR_L = 25,
186 OPC_ATOMIC_XOR_L = 26,
187 OPC_LDGB_TYPED_4D = 27,
188 OPC_STGB_4D_4 = 28,
189 OPC_STIB = 29,
190 OPC_LDC_4 = 30,
191 OPC_LDLV = 31,
192
193 /* meta instructions (category -1): */
194 /* placeholder instr to mark inputs/outputs: */
195 OPC_META_INPUT = 0,
196 OPC_META_OUTPUT = 1,
197 /* The "fan-in" and "fan-out" instructions are used for keeping
198 * track of instructions that write to multiple dst registers
199 * (fan-out) like texture sample instructions, or read multiple
200 * consecutive scalar registers (fan-in) (bary.f, texture samp)
201 */
202 OPC_META_FO = 2,
203 OPC_META_FI = 3,
204 /* branches/flow control */
205 OPC_META_FLOW = 4,
206 OPC_META_PHI = 5,
207
208
209 } opc_t;
210
211 typedef enum {
212 TYPE_F16 = 0,
213 TYPE_F32 = 1,
214 TYPE_U16 = 2,
215 TYPE_U32 = 3,
216 TYPE_S16 = 4,
217 TYPE_S32 = 5,
218 TYPE_U8 = 6,
219 TYPE_S8 = 7, // XXX I assume?
220 } type_t;
221
222 static inline uint32_t type_size(type_t type)
223 {
224 switch (type) {
225 case TYPE_F32:
226 case TYPE_U32:
227 case TYPE_S32:
228 return 32;
229 case TYPE_F16:
230 case TYPE_U16:
231 case TYPE_S16:
232 return 16;
233 case TYPE_U8:
234 case TYPE_S8:
235 return 8;
236 default:
237 assert(0); /* invalid type */
238 return 0;
239 }
240 }
241
242 static inline int type_float(type_t type)
243 {
244 return (type == TYPE_F32) || (type == TYPE_F16);
245 }
246
247 typedef union PACKED {
248 /* normal gpr or const src register: */
249 struct PACKED {
250 uint32_t comp : 2;
251 uint32_t num : 10;
252 };
253 /* for immediate val: */
254 int32_t iim_val : 11;
255 /* to make compiler happy: */
256 uint32_t dummy32;
257 uint32_t dummy10 : 10;
258 uint32_t dummy11 : 11;
259 uint32_t dummy12 : 12;
260 uint32_t dummy13 : 13;
261 uint32_t dummy8 : 8;
262 } reg_t;
263
264 /* special registers: */
265 #define REG_A0 61 /* address register */
266 #define REG_P0 62 /* predicate register */
267
268 static inline int reg_special(reg_t reg)
269 {
270 return (reg.num == REG_A0) || (reg.num == REG_P0);
271 }
272
273 typedef struct PACKED {
274 /* dword0: */
275 int16_t immed : 16;
276 uint32_t dummy1 : 16;
277
278 /* dword1: */
279 uint32_t dummy2 : 8;
280 uint32_t repeat : 3;
281 uint32_t dummy3 : 1;
282 uint32_t ss : 1;
283 uint32_t dummy4 : 7;
284 uint32_t inv : 1;
285 uint32_t comp : 2;
286 uint32_t opc : 4;
287 uint32_t jmp_tgt : 1;
288 uint32_t sync : 1;
289 uint32_t opc_cat : 3;
290 } instr_cat0_t;
291
292 typedef struct PACKED {
293 /* dword0: */
294 union PACKED {
295 /* for normal src register: */
296 struct PACKED {
297 uint32_t src : 11;
298 /* at least low bit of pad must be zero or it will
299 * look like a address relative src
300 */
301 uint32_t pad : 21;
302 };
303 /* for address relative: */
304 struct PACKED {
305 int32_t off : 10;
306 uint32_t src_rel_c : 1;
307 uint32_t src_rel : 1;
308 uint32_t unknown : 20;
309 };
310 /* for immediate: */
311 int32_t iim_val;
312 float fim_val;
313 };
314
315 /* dword1: */
316 uint32_t dst : 8;
317 uint32_t repeat : 3;
318 uint32_t src_r : 1;
319 uint32_t ss : 1;
320 uint32_t ul : 1;
321 uint32_t dst_type : 3;
322 uint32_t dst_rel : 1;
323 uint32_t src_type : 3;
324 uint32_t src_c : 1;
325 uint32_t src_im : 1;
326 uint32_t even : 1;
327 uint32_t pos_inf : 1;
328 uint32_t must_be_0 : 2;
329 uint32_t jmp_tgt : 1;
330 uint32_t sync : 1;
331 uint32_t opc_cat : 3;
332 } instr_cat1_t;
333
334 typedef struct PACKED {
335 /* dword0: */
336 union PACKED {
337 struct PACKED {
338 uint32_t src1 : 11;
339 uint32_t must_be_zero1: 2;
340 uint32_t src1_im : 1; /* immediate */
341 uint32_t src1_neg : 1; /* negate */
342 uint32_t src1_abs : 1; /* absolute value */
343 };
344 struct PACKED {
345 uint32_t src1 : 10;
346 uint32_t src1_c : 1; /* relative-const */
347 uint32_t src1_rel : 1; /* relative address */
348 uint32_t must_be_zero : 1;
349 uint32_t dummy : 3;
350 } rel1;
351 struct PACKED {
352 uint32_t src1 : 12;
353 uint32_t src1_c : 1; /* const */
354 uint32_t dummy : 3;
355 } c1;
356 };
357
358 union PACKED {
359 struct PACKED {
360 uint32_t src2 : 11;
361 uint32_t must_be_zero2: 2;
362 uint32_t src2_im : 1; /* immediate */
363 uint32_t src2_neg : 1; /* negate */
364 uint32_t src2_abs : 1; /* absolute value */
365 };
366 struct PACKED {
367 uint32_t src2 : 10;
368 uint32_t src2_c : 1; /* relative-const */
369 uint32_t src2_rel : 1; /* relative address */
370 uint32_t must_be_zero : 1;
371 uint32_t dummy : 3;
372 } rel2;
373 struct PACKED {
374 uint32_t src2 : 12;
375 uint32_t src2_c : 1; /* const */
376 uint32_t dummy : 3;
377 } c2;
378 };
379
380 /* dword1: */
381 uint32_t dst : 8;
382 uint32_t repeat : 3;
383 uint32_t src1_r : 1;
384 uint32_t ss : 1;
385 uint32_t ul : 1; /* dunno */
386 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
387 uint32_t ei : 1;
388 uint32_t cond : 3;
389 uint32_t src2_r : 1;
390 uint32_t full : 1; /* not half */
391 uint32_t opc : 6;
392 uint32_t jmp_tgt : 1;
393 uint32_t sync : 1;
394 uint32_t opc_cat : 3;
395 } instr_cat2_t;
396
397 typedef struct PACKED {
398 /* dword0: */
399 union PACKED {
400 struct PACKED {
401 uint32_t src1 : 11;
402 uint32_t must_be_zero1: 2;
403 uint32_t src2_c : 1;
404 uint32_t src1_neg : 1;
405 uint32_t src2_r : 1;
406 };
407 struct PACKED {
408 uint32_t src1 : 10;
409 uint32_t src1_c : 1;
410 uint32_t src1_rel : 1;
411 uint32_t must_be_zero : 1;
412 uint32_t dummy : 3;
413 } rel1;
414 struct PACKED {
415 uint32_t src1 : 12;
416 uint32_t src1_c : 1;
417 uint32_t dummy : 3;
418 } c1;
419 };
420
421 union PACKED {
422 struct PACKED {
423 uint32_t src3 : 11;
424 uint32_t must_be_zero2: 2;
425 uint32_t src3_r : 1;
426 uint32_t src2_neg : 1;
427 uint32_t src3_neg : 1;
428 };
429 struct PACKED {
430 uint32_t src3 : 10;
431 uint32_t src3_c : 1;
432 uint32_t src3_rel : 1;
433 uint32_t must_be_zero : 1;
434 uint32_t dummy : 3;
435 } rel2;
436 struct PACKED {
437 uint32_t src3 : 12;
438 uint32_t src3_c : 1;
439 uint32_t dummy : 3;
440 } c2;
441 };
442
443 /* dword1: */
444 uint32_t dst : 8;
445 uint32_t repeat : 3;
446 uint32_t src1_r : 1;
447 uint32_t ss : 1;
448 uint32_t ul : 1;
449 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
450 uint32_t src2 : 8;
451 uint32_t opc : 4;
452 uint32_t jmp_tgt : 1;
453 uint32_t sync : 1;
454 uint32_t opc_cat : 3;
455 } instr_cat3_t;
456
457 static inline bool instr_cat3_full(instr_cat3_t *cat3)
458 {
459 switch (cat3->opc) {
460 case OPC_MAD_F16:
461 case OPC_MAD_U16:
462 case OPC_MAD_S16:
463 case OPC_SEL_B16:
464 case OPC_SEL_S16:
465 case OPC_SEL_F16:
466 case OPC_SAD_S16:
467 case OPC_SAD_S32: // really??
468 return false;
469 default:
470 return true;
471 }
472 }
473
474 typedef struct PACKED {
475 /* dword0: */
476 union PACKED {
477 struct PACKED {
478 uint32_t src : 11;
479 uint32_t must_be_zero1: 2;
480 uint32_t src_im : 1; /* immediate */
481 uint32_t src_neg : 1; /* negate */
482 uint32_t src_abs : 1; /* absolute value */
483 };
484 struct PACKED {
485 uint32_t src : 10;
486 uint32_t src_c : 1; /* relative-const */
487 uint32_t src_rel : 1; /* relative address */
488 uint32_t must_be_zero : 1;
489 uint32_t dummy : 3;
490 } rel;
491 struct PACKED {
492 uint32_t src : 12;
493 uint32_t src_c : 1; /* const */
494 uint32_t dummy : 3;
495 } c;
496 };
497 uint32_t dummy1 : 16; /* seem to be ignored */
498
499 /* dword1: */
500 uint32_t dst : 8;
501 uint32_t repeat : 3;
502 uint32_t src_r : 1;
503 uint32_t ss : 1;
504 uint32_t ul : 1;
505 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
506 uint32_t dummy2 : 5; /* seem to be ignored */
507 uint32_t full : 1; /* not half */
508 uint32_t opc : 6;
509 uint32_t jmp_tgt : 1;
510 uint32_t sync : 1;
511 uint32_t opc_cat : 3;
512 } instr_cat4_t;
513
514 typedef struct PACKED {
515 /* dword0: */
516 union PACKED {
517 /* normal case: */
518 struct PACKED {
519 uint32_t full : 1; /* not half */
520 uint32_t src1 : 8;
521 uint32_t src2 : 8;
522 uint32_t dummy1 : 4; /* seem to be ignored */
523 uint32_t samp : 4;
524 uint32_t tex : 7;
525 } norm;
526 /* s2en case: */
527 struct PACKED {
528 uint32_t full : 1; /* not half */
529 uint32_t src1 : 8;
530 uint32_t src2 : 11;
531 uint32_t dummy1 : 1;
532 uint32_t src3 : 8;
533 uint32_t dummy2 : 3;
534 } s2en;
535 /* same in either case: */
536 // XXX I think, confirm this
537 struct PACKED {
538 uint32_t full : 1; /* not half */
539 uint32_t src1 : 8;
540 uint32_t pad : 23;
541 };
542 };
543
544 /* dword1: */
545 uint32_t dst : 8;
546 uint32_t wrmask : 4; /* write-mask */
547 uint32_t type : 3;
548 uint32_t dummy2 : 1; /* seems to be ignored */
549 uint32_t is_3d : 1;
550
551 uint32_t is_a : 1;
552 uint32_t is_s : 1;
553 uint32_t is_s2en : 1;
554 uint32_t is_o : 1;
555 uint32_t is_p : 1;
556
557 uint32_t opc : 5;
558 uint32_t jmp_tgt : 1;
559 uint32_t sync : 1;
560 uint32_t opc_cat : 3;
561 } instr_cat5_t;
562
563 /* used for load instructions: */
564 typedef struct PACKED {
565 /* dword0: */
566 uint32_t must_be_one1 : 1;
567 int16_t off : 13;
568 uint32_t src : 8;
569 uint32_t dummy1 : 1;
570 uint32_t must_be_one2 : 1;
571 int32_t iim_val : 8;
572
573 /* dword1: */
574 uint32_t dst : 8;
575 uint32_t dummy2 : 9;
576 uint32_t type : 3;
577 uint32_t dummy3 : 2;
578 uint32_t opc : 5;
579 uint32_t jmp_tgt : 1;
580 uint32_t sync : 1;
581 uint32_t opc_cat : 3;
582 } instr_cat6a_t;
583
584 /* used for store instructions: */
585 typedef struct PACKED {
586 /* dword0: */
587 uint32_t must_be_zero1 : 1;
588 uint32_t src : 8;
589 uint32_t off_hi : 5; /* high bits of 'off'... ugly! */
590 uint32_t dummy1 : 9;
591 uint32_t must_be_one1 : 1;
592 int32_t iim_val : 8;
593
594 /* dword1: */
595 uint16_t off : 8;
596 uint32_t must_be_one2 : 1;
597 uint32_t dst : 8;
598 uint32_t type : 3;
599 uint32_t dummy2 : 2;
600 uint32_t opc : 5;
601 uint32_t jmp_tgt : 1;
602 uint32_t sync : 1;
603 uint32_t opc_cat : 3;
604 } instr_cat6b_t;
605
606 typedef union PACKED {
607 instr_cat6a_t a;
608 instr_cat6b_t b;
609 struct PACKED {
610 /* dword0: */
611 uint32_t pad1 : 24;
612 int32_t iim_val : 8;
613
614 /* dword1: */
615 uint32_t pad2 : 17;
616 uint32_t type : 3;
617 uint32_t pad3 : 2;
618 uint32_t opc : 5;
619 uint32_t jmp_tgt : 1;
620 uint32_t sync : 1;
621 uint32_t opc_cat : 3;
622 };
623 } instr_cat6_t;
624
625 typedef union PACKED {
626 instr_cat0_t cat0;
627 instr_cat1_t cat1;
628 instr_cat2_t cat2;
629 instr_cat3_t cat3;
630 instr_cat4_t cat4;
631 instr_cat5_t cat5;
632 instr_cat6_t cat6;
633 struct PACKED {
634 /* dword0: */
635 uint64_t pad1 : 40;
636 uint32_t repeat : 3; /* cat0-cat4 */
637 uint32_t pad2 : 1;
638 uint32_t ss : 1; /* cat1-cat4 (cat0??) */
639 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
640 uint32_t pad3 : 13;
641 uint32_t jmp_tgt : 1;
642 uint32_t sync : 1;
643 uint32_t opc_cat : 3;
644
645 };
646 } instr_t;
647
648 static inline uint32_t instr_opc(instr_t *instr)
649 {
650 switch (instr->opc_cat) {
651 case 0: return instr->cat0.opc;
652 case 1: return 0;
653 case 2: return instr->cat2.opc;
654 case 3: return instr->cat3.opc;
655 case 4: return instr->cat4.opc;
656 case 5: return instr->cat5.opc;
657 case 6: return instr->cat6.opc;
658 default: return 0;
659 }
660 }
661
662 static inline bool is_mad(opc_t opc)
663 {
664 switch (opc) {
665 case OPC_MAD_U16:
666 case OPC_MADSH_U16:
667 case OPC_MAD_S16:
668 case OPC_MADSH_M16:
669 case OPC_MAD_U24:
670 case OPC_MAD_S24:
671 case OPC_MAD_F16:
672 case OPC_MAD_F32:
673 return true;
674 default:
675 return false;
676 }
677 }
678
679 #endif /* INSTR_A3XX_H_ */