src/freedreno/ir3/instr-a3xx.h

   1 /*
   2  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23
  24 #ifndef INSTR_A3XX_H_
  25 #define INSTR_A3XX_H_
  26
  27 #define PACKED __attribute__((__packed__))
  28
  29 #include <stdint.h>
  30 #include <stdio.h>
  31 #include <stdbool.h>
  32 #include <assert.h>
  33
  34 /* size of largest OPC field of all the instruction categories: */
  35 #define NOPC_BITS 6
  36
  37 #define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
  38
  39 typedef enum {
  40         /* category 0: */
  41         OPC_NOP             = _OPC(0, 0),
  42         OPC_BR              = _OPC(0, 1),
  43         OPC_JUMP            = _OPC(0, 2),
  44         OPC_CALL            = _OPC(0, 3),
  45         OPC_RET             = _OPC(0, 4),
  46         OPC_KILL            = _OPC(0, 5),
  47         OPC_END             = _OPC(0, 6),
  48         OPC_EMIT            = _OPC(0, 7),
  49         OPC_CUT             = _OPC(0, 8),
  50         OPC_CHMASK          = _OPC(0, 9),
  51         OPC_CHSH            = _OPC(0, 10),
  52         OPC_FLOW_REV        = _OPC(0, 11),
  53
  54         OPC_IF              = _OPC(0, 13),
  55         OPC_ELSE            = _OPC(0, 14),
  56         OPC_ENDIF           = _OPC(0, 15),
  57
  58         /* category 1: */
  59         OPC_MOV             = _OPC(1, 0),
  60
  61         /* category 2: */
  62         OPC_ADD_F           = _OPC(2, 0),
  63         OPC_MIN_F           = _OPC(2, 1),
  64         OPC_MAX_F           = _OPC(2, 2),
  65         OPC_MUL_F           = _OPC(2, 3),
  66         OPC_SIGN_F          = _OPC(2, 4),
  67         OPC_CMPS_F          = _OPC(2, 5),
  68         OPC_ABSNEG_F        = _OPC(2, 6),
  69         OPC_CMPV_F          = _OPC(2, 7),
  70         /* 8 - invalid */
  71         OPC_FLOOR_F         = _OPC(2, 9),
  72         OPC_CEIL_F          = _OPC(2, 10),
  73         OPC_RNDNE_F         = _OPC(2, 11),
  74         OPC_RNDAZ_F         = _OPC(2, 12),
  75         OPC_TRUNC_F         = _OPC(2, 13),
  76         /* 14-15 - invalid */
  77         OPC_ADD_U           = _OPC(2, 16),
  78         OPC_ADD_S           = _OPC(2, 17),
  79         OPC_SUB_U           = _OPC(2, 18),
  80         OPC_SUB_S           = _OPC(2, 19),
  81         OPC_CMPS_U          = _OPC(2, 20),
  82         OPC_CMPS_S          = _OPC(2, 21),
  83         OPC_MIN_U           = _OPC(2, 22),
  84         OPC_MIN_S           = _OPC(2, 23),
  85         OPC_MAX_U           = _OPC(2, 24),
  86         OPC_MAX_S           = _OPC(2, 25),
  87         OPC_ABSNEG_S        = _OPC(2, 26),
  88         /* 27 - invalid */
  89         OPC_AND_B           = _OPC(2, 28),
  90         OPC_OR_B            = _OPC(2, 29),
  91         OPC_NOT_B           = _OPC(2, 30),
  92         OPC_XOR_B           = _OPC(2, 31),
  93         /* 32 - invalid */
  94         OPC_CMPV_U          = _OPC(2, 33),
  95         OPC_CMPV_S          = _OPC(2, 34),
  96         /* 35-47 - invalid */
  97         OPC_MUL_U24         = _OPC(2, 48), /* 24b mul into 32b result */
  98         OPC_MUL_S24         = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
  99         OPC_MULL_U          = _OPC(2, 50),
 100         OPC_BFREV_B         = _OPC(2, 51),
 101         OPC_CLZ_S           = _OPC(2, 52),
 102         OPC_CLZ_B           = _OPC(2, 53),
 103         OPC_SHL_B           = _OPC(2, 54),
 104         OPC_SHR_B           = _OPC(2, 55),
 105         OPC_ASHR_B          = _OPC(2, 56),
 106         OPC_BARY_F          = _OPC(2, 57),
 107         OPC_MGEN_B          = _OPC(2, 58),
 108         OPC_GETBIT_B        = _OPC(2, 59),
 109         OPC_SETRM           = _OPC(2, 60),
 110         OPC_CBITS_B         = _OPC(2, 61),
 111         OPC_SHB             = _OPC(2, 62),
 112         OPC_MSAD            = _OPC(2, 63),
 113
 114         /* category 3: */
 115         OPC_MAD_U16         = _OPC(3, 0),
 116         OPC_MADSH_U16       = _OPC(3, 1),
 117         OPC_MAD_S16         = _OPC(3, 2),
 118         OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
 119         OPC_MAD_U24         = _OPC(3, 4),
 120         OPC_MAD_S24         = _OPC(3, 5),
 121         OPC_MAD_F16         = _OPC(3, 6),
 122         OPC_MAD_F32         = _OPC(3, 7),
 123         OPC_SEL_B16         = _OPC(3, 8),
 124         OPC_SEL_B32         = _OPC(3, 9),
 125         OPC_SEL_S16         = _OPC(3, 10),
 126         OPC_SEL_S32         = _OPC(3, 11),
 127         OPC_SEL_F16         = _OPC(3, 12),
 128         OPC_SEL_F32         = _OPC(3, 13),
 129         OPC_SAD_S16         = _OPC(3, 14),
 130         OPC_SAD_S32         = _OPC(3, 15),
 131
 132         /* category 4: */
 133         OPC_RCP             = _OPC(4, 0),
 134         OPC_RSQ             = _OPC(4, 1),
 135         OPC_LOG2            = _OPC(4, 2),
 136         OPC_EXP2            = _OPC(4, 3),
 137         OPC_SIN             = _OPC(4, 4),
 138         OPC_COS             = _OPC(4, 5),
 139         OPC_SQRT            = _OPC(4, 6),
 140         /* NOTE that these are 8+opc from their highp equivs, so it's possible
 141          * that the high order bit in the opc field has been repurposed for
 142          * half-precision use?  But note that other ops (rcp/lsin/cos/sqrt)
 143          * still use the same opc as highp
 144          */
 145         OPC_HRSQ            = _OPC(4, 9),
 146         OPC_HLOG2           = _OPC(4, 10),
 147         OPC_HEXP2           = _OPC(4, 11),
 148
 149         /* category 5: */
 150         OPC_ISAM            = _OPC(5, 0),
 151         OPC_ISAML           = _OPC(5, 1),
 152         OPC_ISAMM           = _OPC(5, 2),
 153         OPC_SAM             = _OPC(5, 3),
 154         OPC_SAMB            = _OPC(5, 4),
 155         OPC_SAML            = _OPC(5, 5),
 156         OPC_SAMGQ           = _OPC(5, 6),
 157         OPC_GETLOD          = _OPC(5, 7),
 158         OPC_CONV            = _OPC(5, 8),
 159         OPC_CONVM           = _OPC(5, 9),
 160         OPC_GETSIZE         = _OPC(5, 10),
 161         OPC_GETBUF          = _OPC(5, 11),
 162         OPC_GETPOS          = _OPC(5, 12),
 163         OPC_GETINFO         = _OPC(5, 13),
 164         OPC_DSX             = _OPC(5, 14),
 165         OPC_DSY             = _OPC(5, 15),
 166         OPC_GATHER4R        = _OPC(5, 16),
 167         OPC_GATHER4G        = _OPC(5, 17),
 168         OPC_GATHER4B        = _OPC(5, 18),
 169         OPC_GATHER4A        = _OPC(5, 19),
 170         OPC_SAMGP0          = _OPC(5, 20),
 171         OPC_SAMGP1          = _OPC(5, 21),
 172         OPC_SAMGP2          = _OPC(5, 22),
 173         OPC_SAMGP3          = _OPC(5, 23),
 174         OPC_DSXPP_1         = _OPC(5, 24),
 175         OPC_DSYPP_1         = _OPC(5, 25),
 176         OPC_RGETPOS         = _OPC(5, 26),
 177         OPC_RGETINFO        = _OPC(5, 27),
 178
 179         /* category 6: */
 180         OPC_LDG             = _OPC(6, 0),        /* load-global */
 181         OPC_LDL             = _OPC(6, 1),
 182         OPC_LDP             = _OPC(6, 2),
 183         OPC_STG             = _OPC(6, 3),        /* store-global */
 184         OPC_STL             = _OPC(6, 4),
 185         OPC_STP             = _OPC(6, 5),
 186         OPC_LDIB            = _OPC(6, 6),
 187         OPC_G2L             = _OPC(6, 7),
 188         OPC_L2G             = _OPC(6, 8),
 189         OPC_PREFETCH        = _OPC(6, 9),
 190         OPC_LDLW            = _OPC(6, 10),
 191         OPC_STLW            = _OPC(6, 11),
 192         OPC_RESFMT          = _OPC(6, 14),
 193         OPC_RESINFO         = _OPC(6, 15),
 194         OPC_ATOMIC_ADD      = _OPC(6, 16),
 195         OPC_ATOMIC_SUB      = _OPC(6, 17),
 196         OPC_ATOMIC_XCHG     = _OPC(6, 18),
 197         OPC_ATOMIC_INC      = _OPC(6, 19),
 198         OPC_ATOMIC_DEC      = _OPC(6, 20),
 199         OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
 200         OPC_ATOMIC_MIN      = _OPC(6, 22),
 201         OPC_ATOMIC_MAX      = _OPC(6, 23),
 202         OPC_ATOMIC_AND      = _OPC(6, 24),
 203         OPC_ATOMIC_OR       = _OPC(6, 25),
 204         OPC_ATOMIC_XOR      = _OPC(6, 26),
 205         OPC_LDGB            = _OPC(6, 27),
 206         OPC_STGB            = _OPC(6, 28),
 207         OPC_STIB            = _OPC(6, 29),
 208         OPC_LDC             = _OPC(6, 30),
 209         OPC_LDLV            = _OPC(6, 31),
 210
 211         /* category 7: */
 212         OPC_BAR             = _OPC(7, 0),
 213         OPC_FENCE           = _OPC(7, 1),
 214
 215         /* meta instructions (category -1): */
 216         /* placeholder instr to mark shader inputs: */
 217         OPC_META_INPUT      = _OPC(-1, 0),
 218         /* The "collect" and "split" instructions are used for keeping
 219          * track of instructions that write to multiple dst registers
 220          * (split) like texture sample instructions, or read multiple
 221          * consecutive scalar registers (collect) (bary.f, texture samp)
 222          *
 223          * A "split" extracts a scalar component from a vecN, and a
 224          * "collect" gathers multiple scalar components into a vecN
 225          */
 226         OPC_META_SPLIT      = _OPC(-1, 2),
 227         OPC_META_COLLECT    = _OPC(-1, 3),
 228
 229         /* placeholder for texture fetches that run before FS invocation
 230          * starts:
 231          */
 232         OPC_META_TEX_PREFETCH = _OPC(-1, 4),
 233
 234 } opc_t;
 235
 236 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
 237 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
 238
 239 typedef enum {
 240         TYPE_F16 = 0,
 241         TYPE_F32 = 1,
 242         TYPE_U16 = 2,
 243         TYPE_U32 = 3,
 244         TYPE_S16 = 4,
 245         TYPE_S32 = 5,
 246         TYPE_U8  = 6,
 247         TYPE_S8  = 7,  // XXX I assume?
 248 } type_t;
 249
 250 static inline uint32_t type_size(type_t type)
 251 {
 252         switch (type) {
 253         case TYPE_F32:
 254         case TYPE_U32:
 255         case TYPE_S32:
 256                 return 32;
 257         case TYPE_F16:
 258         case TYPE_U16:
 259         case TYPE_S16:
 260                 return 16;
 261         case TYPE_U8:
 262         case TYPE_S8:
 263                 return 8;
 264         default:
 265                 assert(0); /* invalid type */
 266                 return 0;
 267         }
 268 }
 269
 270 static inline int type_float(type_t type)
 271 {
 272         return (type == TYPE_F32) || (type == TYPE_F16);
 273 }
 274
 275 static inline int type_uint(type_t type)
 276 {
 277         return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
 278 }
 279
 280 static inline int type_sint(type_t type)
 281 {
 282         return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
 283 }
 284
 285 typedef union PACKED {
 286         /* normal gpr or const src register: */
 287         struct PACKED {
 288                 uint32_t comp  : 2;
 289                 uint32_t num   : 10;
 290         };
 291         /* for immediate val: */
 292         int32_t  iim_val   : 11;
 293         /* to make compiler happy: */
 294         uint32_t dummy32;
 295         uint32_t dummy10   : 10;
 296         int32_t  idummy10  : 10;
 297         uint32_t dummy11   : 11;
 298         uint32_t dummy12   : 12;
 299         uint32_t dummy13   : 13;
 300         uint32_t dummy8    : 8;
 301         int32_t  idummy13  : 13;
 302         int32_t  idummy8   : 8;
 303 } reg_t;
 304
 305 /* special registers: */
 306 #define REG_A0 61       /* address register */
 307 #define REG_P0 62       /* predicate register */
 308
 309 static inline int reg_special(reg_t reg)
 310 {
 311         return (reg.num == REG_A0) || (reg.num == REG_P0);
 312 }
 313
 314 typedef struct PACKED {
 315         /* dword0: */
 316         union PACKED {
 317                 struct PACKED {
 318                         int16_t  immed    : 16;
 319                         uint32_t dummy1   : 16;
 320                 } a3xx;
 321                 struct PACKED {
 322                         int32_t  immed    : 20;
 323                         uint32_t dummy1   : 12;
 324                 } a4xx;
 325                 struct PACKED {
 326                         int32_t immed     : 32;
 327                 } a5xx;
 328         };
 329
 330         /* dword1: */
 331         uint32_t dummy2   : 8;
 332         uint32_t repeat   : 3;
 333         uint32_t dummy3   : 1;
 334         uint32_t ss       : 1;
 335         uint32_t dummy4   : 7;
 336         uint32_t inv      : 1;
 337         uint32_t comp     : 2;
 338         uint32_t opc      : 4;
 339         uint32_t jmp_tgt  : 1;
 340         uint32_t sync     : 1;
 341         uint32_t opc_cat  : 3;
 342 } instr_cat0_t;
 343
 344 typedef struct PACKED {
 345         /* dword0: */
 346         union PACKED {
 347                 /* for normal src register: */
 348                 struct PACKED {
 349                         uint32_t src : 11;
 350                         /* at least low bit of pad must be zero or it will
 351                          * look like a address relative src
 352                          */
 353                         uint32_t pad : 21;
 354                 };
 355                 /* for address relative: */
 356                 struct PACKED {
 357                         int32_t  off : 10;
 358                         uint32_t src_rel_c : 1;
 359                         uint32_t src_rel : 1;
 360                         uint32_t unknown : 20;
 361                 };
 362                 /* for immediate: */
 363                 int32_t  iim_val;
 364                 uint32_t uim_val;
 365                 float    fim_val;
 366         };
 367
 368         /* dword1: */
 369         uint32_t dst        : 8;
 370         uint32_t repeat     : 3;
 371         uint32_t src_r      : 1;
 372         uint32_t ss         : 1;
 373         uint32_t ul         : 1;
 374         uint32_t dst_type   : 3;
 375         uint32_t dst_rel    : 1;
 376         uint32_t src_type   : 3;
 377         uint32_t src_c      : 1;
 378         uint32_t src_im     : 1;
 379         uint32_t even       : 1;
 380         uint32_t pos_inf    : 1;
 381         uint32_t must_be_0  : 2;
 382         uint32_t jmp_tgt    : 1;
 383         uint32_t sync       : 1;
 384         uint32_t opc_cat    : 3;
 385 } instr_cat1_t;
 386
 387 typedef struct PACKED {
 388         /* dword0: */
 389         union PACKED {
 390                 struct PACKED {
 391                         uint32_t src1         : 11;
 392                         uint32_t must_be_zero1: 2;
 393                         uint32_t src1_im      : 1;   /* immediate */
 394                         uint32_t src1_neg     : 1;   /* negate */
 395                         uint32_t src1_abs     : 1;   /* absolute value */
 396                 };
 397                 struct PACKED {
 398                         uint32_t src1         : 10;
 399                         uint32_t src1_c       : 1;   /* relative-const */
 400                         uint32_t src1_rel     : 1;   /* relative address */
 401                         uint32_t must_be_zero : 1;
 402                         uint32_t dummy        : 3;
 403                 } rel1;
 404                 struct PACKED {
 405                         uint32_t src1         : 12;
 406                         uint32_t src1_c       : 1;   /* const */
 407                         uint32_t dummy        : 3;
 408                 } c1;
 409         };
 410
 411         union PACKED {
 412                 struct PACKED {
 413                         uint32_t src2         : 11;
 414                         uint32_t must_be_zero2: 2;
 415                         uint32_t src2_im      : 1;   /* immediate */
 416                         uint32_t src2_neg     : 1;   /* negate */
 417                         uint32_t src2_abs     : 1;   /* absolute value */
 418                 };
 419                 struct PACKED {
 420                         uint32_t src2         : 10;
 421                         uint32_t src2_c       : 1;   /* relative-const */
 422                         uint32_t src2_rel     : 1;   /* relative address */
 423                         uint32_t must_be_zero : 1;
 424                         uint32_t dummy        : 3;
 425                 } rel2;
 426                 struct PACKED {
 427                         uint32_t src2         : 12;
 428                         uint32_t src2_c       : 1;   /* const */
 429                         uint32_t dummy        : 3;
 430                 } c2;
 431         };
 432
 433         /* dword1: */
 434         uint32_t dst      : 8;
 435         uint32_t repeat   : 2;
 436         uint32_t sat      : 1;
 437         uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
 438         uint32_t ss       : 1;
 439         uint32_t ul       : 1;   /* dunno */
 440         uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
 441         uint32_t ei       : 1;
 442         uint32_t cond     : 3;
 443         uint32_t src2_r   : 1;   /* doubles as nop1 if repeat==0 */
 444         uint32_t full     : 1;   /* not half */
 445         uint32_t opc      : 6;
 446         uint32_t jmp_tgt  : 1;
 447         uint32_t sync     : 1;
 448         uint32_t opc_cat  : 3;
 449 } instr_cat2_t;
 450
 451 typedef struct PACKED {
 452         /* dword0: */
 453         union PACKED {
 454                 struct PACKED {
 455                         uint32_t src1         : 11;
 456                         uint32_t must_be_zero1: 2;
 457                         uint32_t src2_c       : 1;
 458                         uint32_t src1_neg     : 1;
 459                         uint32_t src2_r       : 1;  /* doubles as nop1 if repeat==0 */
 460                 };
 461                 struct PACKED {
 462                         uint32_t src1         : 10;
 463                         uint32_t src1_c       : 1;
 464                         uint32_t src1_rel     : 1;
 465                         uint32_t must_be_zero : 1;
 466                         uint32_t dummy        : 3;
 467                 } rel1;
 468                 struct PACKED {
 469                         uint32_t src1         : 12;
 470                         uint32_t src1_c       : 1;
 471                         uint32_t dummy        : 3;
 472                 } c1;
 473         };
 474
 475         union PACKED {
 476                 struct PACKED {
 477                         uint32_t src3         : 11;
 478                         uint32_t must_be_zero2: 2;
 479                         uint32_t src3_r       : 1;
 480                         uint32_t src2_neg     : 1;
 481                         uint32_t src3_neg     : 1;
 482                 };
 483                 struct PACKED {
 484                         uint32_t src3         : 10;
 485                         uint32_t src3_c       : 1;
 486                         uint32_t src3_rel     : 1;
 487                         uint32_t must_be_zero : 1;
 488                         uint32_t dummy        : 3;
 489                 } rel2;
 490                 struct PACKED {
 491                         uint32_t src3         : 12;
 492                         uint32_t src3_c       : 1;
 493                         uint32_t dummy        : 3;
 494                 } c2;
 495         };
 496
 497         /* dword1: */
 498         uint32_t dst      : 8;
 499         uint32_t repeat   : 2;
 500         uint32_t sat      : 1;
 501         uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
 502         uint32_t ss       : 1;
 503         uint32_t ul       : 1;
 504         uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
 505         uint32_t src2     : 8;
 506         uint32_t opc      : 4;
 507         uint32_t jmp_tgt  : 1;
 508         uint32_t sync     : 1;
 509         uint32_t opc_cat  : 3;
 510 } instr_cat3_t;
 511
 512 static inline bool instr_cat3_full(instr_cat3_t *cat3)
 513 {
 514         switch (_OPC(3, cat3->opc)) {
 515         case OPC_MAD_F16:
 516         case OPC_MAD_U16:
 517         case OPC_MAD_S16:
 518         case OPC_SEL_B16:
 519         case OPC_SEL_S16:
 520         case OPC_SEL_F16:
 521         case OPC_SAD_S16:
 522         case OPC_SAD_S32:  // really??
 523                 return false;
 524         default:
 525                 return true;
 526         }
 527 }
 528
 529 typedef struct PACKED {
 530         /* dword0: */
 531         union PACKED {
 532                 struct PACKED {
 533                         uint32_t src          : 11;
 534                         uint32_t must_be_zero1: 2;
 535                         uint32_t src_im       : 1;   /* immediate */
 536                         uint32_t src_neg      : 1;   /* negate */
 537                         uint32_t src_abs      : 1;   /* absolute value */
 538                 };
 539                 struct PACKED {
 540                         uint32_t src          : 10;
 541                         uint32_t src_c        : 1;   /* relative-const */
 542                         uint32_t src_rel      : 1;   /* relative address */
 543                         uint32_t must_be_zero : 1;
 544                         uint32_t dummy        : 3;
 545                 } rel;
 546                 struct PACKED {
 547                         uint32_t src          : 12;
 548                         uint32_t src_c        : 1;   /* const */
 549                         uint32_t dummy        : 3;
 550                 } c;
 551         };
 552         uint32_t dummy1   : 16;  /* seem to be ignored */
 553
 554         /* dword1: */
 555         uint32_t dst      : 8;
 556         uint32_t repeat   : 2;
 557         uint32_t sat      : 1;
 558         uint32_t src_r    : 1;
 559         uint32_t ss       : 1;
 560         uint32_t ul       : 1;
 561         uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
 562         uint32_t dummy2   : 5;   /* seem to be ignored */
 563         uint32_t full     : 1;   /* not half */
 564         uint32_t opc      : 6;
 565         uint32_t jmp_tgt  : 1;
 566         uint32_t sync     : 1;
 567         uint32_t opc_cat  : 3;
 568 } instr_cat4_t;
 569
 570 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
 571  * if so, how to get the (base, index) pair for both sampler and texture.
 572  * There is a single base embedded in the instruction, which is always used
 573  * for the texture.
 574  */
 575 typedef enum {
 576         /* Use traditional GL binding model, get texture and sampler index
 577          * from src3 which is not presumed to be uniform. This is
 578          * backwards-compatible with earlier generations, where this field was
 579          * always 0 and nonuniform-indexed sampling always worked.
 580          */
 581         CAT5_NONUNIFORM = 0,
 582
 583         /* The sampler base comes from the low 3 bits of a1.x, and the sampler
 584          * and texture index come from src3 which is presumed to be uniform.
 585          */
 586         CAT5_BINDLESS_A1_UNIFORM = 1,
 587
 588         /* The texture and sampler share the same base, and the sampler and
 589          * texture index come from src3 which is *not* presumed to be uniform.
 590          */
 591         CAT5_BINDLESS_NONUNIFORM = 2,
 592
 593         /* The sampler base comes from the low 3 bits of a1.x, and the sampler
 594          * and texture index come from src3 which is *not* presumed to be
 595          * uniform.
 596          */
 597         CAT5_BINDLESS_A1_NONUNIFORM = 3,
 598
 599         /* Use traditional GL binding model, get texture and sampler index
 600          * from src3 which is presumed to be uniform.
 601          */
 602         CAT5_UNIFORM = 4,
 603
 604         /* The texture and sampler share the same base, and the sampler and
 605          * texture index come from src3 which is presumed to be uniform.
 606          */
 607         CAT5_BINDLESS_UNIFORM = 5,
 608
 609         /* The texture and sampler share the same base, get sampler index from low
 610          * 4 bits of src3 and texture index from high 4 bits.
 611          */
 612         CAT5_BINDLESS_IMM = 6,
 613
 614         /* The sampler base comes from the low 3 bits of a1.x, and the texture
 615          * index comes from the next 8 bits of a1.x. The sampler index is an
 616          * immediate in src3.
 617          */
 618         CAT5_BINDLESS_A1_IMM = 7,
 619 } cat5_desc_mode_t;
 620
 621 typedef struct PACKED {
 622         /* dword0: */
 623         union PACKED {
 624                 /* normal case: */
 625                 struct PACKED {
 626                         uint32_t full     : 1;   /* not half */
 627                         uint32_t src1     : 8;
 628                         uint32_t src2     : 8;
 629                         uint32_t dummy1   : 4;   /* seem to be ignored */
 630                         uint32_t samp     : 4;
 631                         uint32_t tex      : 7;
 632                 } norm;
 633                 /* s2en case: */
 634                 struct PACKED {
 635                         uint32_t full         : 1;   /* not half */
 636                         uint32_t src1         : 8;
 637                         uint32_t src2         : 8;
 638                         uint32_t dummy1       : 2;
 639                         uint32_t base_hi      : 2;
 640                         uint32_t src3         : 8;
 641                         uint32_t desc_mode    : 3;
 642                 } s2en_bindless;
 643                 /* same in either case: */
 644                 // XXX I think, confirm this
 645                 struct PACKED {
 646                         uint32_t full     : 1;   /* not half */
 647                         uint32_t src1     : 8;
 648                         uint32_t src2     : 8;
 649                         uint32_t pad      : 15;
 650                 };
 651         };
 652
 653         /* dword1: */
 654         uint32_t dst              : 8;
 655         uint32_t wrmask           : 4;   /* write-mask */
 656         uint32_t type             : 3;
 657         uint32_t base_lo          : 1;   /* used with bindless */
 658         uint32_t is_3d            : 1;
 659
 660         uint32_t is_a             : 1;
 661         uint32_t is_s             : 1;
 662         uint32_t is_s2en_bindless : 1;
 663         uint32_t is_o             : 1;
 664         uint32_t is_p             : 1;
 665
 666         uint32_t opc              : 5;
 667         uint32_t jmp_tgt          : 1;
 668         uint32_t sync             : 1;
 669         uint32_t opc_cat          : 3;
 670 } instr_cat5_t;
 671
 672 /* dword0 encoding for src_off: [src1 + off], src2: */
 673 typedef struct PACKED {
 674         /* dword0: */
 675         uint32_t mustbe1  : 1;
 676         int32_t  off      : 13;
 677         uint32_t src1     : 8;
 678         uint32_t src1_im  : 1;
 679         uint32_t src2_im  : 1;
 680         uint32_t src2     : 8;
 681
 682         /* dword1: */
 683         uint32_t dword1;
 684 } instr_cat6a_t;
 685
 686 /* dword0 encoding for !src_off: [src1], src2 */
 687 typedef struct PACKED {
 688         /* dword0: */
 689         uint32_t mustbe0  : 1;
 690         uint32_t src1     : 13;
 691         uint32_t ignore0  : 8;
 692         uint32_t src1_im  : 1;
 693         uint32_t src2_im  : 1;
 694         uint32_t src2     : 8;
 695
 696         /* dword1: */
 697         uint32_t dword1;
 698 } instr_cat6b_t;
 699
 700 /* dword1 encoding for dst_off: */
 701 typedef struct PACKED {
 702         /* dword0: */
 703         uint32_t dword0;
 704
 705         /* note: there is some weird stuff going on where sometimes
 706          * cat6->a.off is involved.. but that seems like a bug in
 707          * the blob, since it is used even if !cat6->src_off
 708          * It would make sense for there to be some more bits to
 709          * bring us to 11 bits worth of offset, but not sure..
 710          */
 711         int32_t off       : 8;
 712         uint32_t mustbe1  : 1;
 713         uint32_t dst      : 8;
 714         uint32_t pad1     : 15;
 715 } instr_cat6c_t;
 716
 717 /* dword1 encoding for !dst_off: */
 718 typedef struct PACKED {
 719         /* dword0: */
 720         uint32_t dword0;
 721
 722         uint32_t dst      : 8;
 723         uint32_t mustbe0  : 1;
 724         uint32_t idx      : 8;
 725         uint32_t pad0     : 15;
 726 } instr_cat6d_t;
 727
 728 /* ldgb and atomics..
 729  *
 730  * ldgb:      pad0=0, pad3=1
 731  * atomic .g: pad0=1, pad3=1
 732  *        .l: pad0=1, pad3=0
 733  */
 734 typedef struct PACKED {
 735         /* dword0: */
 736         uint32_t pad0     : 1;
 737         uint32_t src3     : 8;
 738         uint32_t d        : 2;
 739         uint32_t typed    : 1;
 740         uint32_t type_size : 2;
 741         uint32_t src1     : 8;
 742         uint32_t src1_im  : 1;
 743         uint32_t src2_im  : 1;
 744         uint32_t src2     : 8;
 745
 746         /* dword1: */
 747         uint32_t dst      : 8;
 748         uint32_t mustbe0  : 1;
 749         uint32_t src_ssbo : 8;
 750         uint32_t pad2     : 3;  // type
 751         uint32_t g        : 1;
 752         uint32_t pad3     : 1;
 753         uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
 754 } instr_cat6ldgb_t;
 755
 756 /* stgb, pad0=0, pad3=2
 757  */
 758 typedef struct PACKED {
 759         /* dword0: */
 760         uint32_t mustbe1  : 1;  // ???
 761         uint32_t src1     : 8;
 762         uint32_t d        : 2;
 763         uint32_t typed    : 1;
 764         uint32_t type_size : 2;
 765         uint32_t pad0     : 9;
 766         uint32_t src2_im  : 1;
 767         uint32_t src2     : 8;
 768
 769         /* dword1: */
 770         uint32_t src3     : 8;
 771         uint32_t src3_im  : 1;
 772         uint32_t dst_ssbo : 8;
 773         uint32_t pad2     : 3;  // type
 774         uint32_t pad3     : 2;
 775         uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
 776 } instr_cat6stgb_t;
 777
 778 typedef union PACKED {
 779         instr_cat6a_t a;
 780         instr_cat6b_t b;
 781         instr_cat6c_t c;
 782         instr_cat6d_t d;
 783         instr_cat6ldgb_t ldgb;
 784         instr_cat6stgb_t stgb;
 785         struct PACKED {
 786                 /* dword0: */
 787                 uint32_t src_off  : 1;
 788                 uint32_t pad1     : 31;
 789
 790                 /* dword1: */
 791                 uint32_t pad2     : 8;
 792                 uint32_t dst_off  : 1;
 793                 uint32_t pad3     : 8;
 794                 uint32_t type     : 3;
 795                 uint32_t g        : 1;  /* or in some cases it means dst immed */
 796                 uint32_t pad4     : 1;
 797                 uint32_t opc      : 5;
 798                 uint32_t jmp_tgt  : 1;
 799                 uint32_t sync     : 1;
 800                 uint32_t opc_cat  : 3;
 801         };
 802 } instr_cat6_t;
 803
 804 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
 805  */
 806 typedef enum {
 807         /* Use old GL binding model with an immediate index.
 808          * TODO: find CAT6_UNIFORM and CAT6_NONUNIFORM
 809          */
 810         CAT6_IMM = 0,
 811
 812         /* Use the bindless model, with an immediate index.
 813          */
 814         CAT6_BINDLESS_IMM = 4,
 815
 816         /* Use the bindless model, with a uniform register index.
 817          */
 818         CAT6_BINDLESS_UNIFORM = 5,
 819
 820         /* Use the bindless model, with a register index that isn't guaranteed
 821          * to be uniform. This presumably checks if the indices are equal and
 822          * splits up the load/store, because it works the way you would
 823          * expect.
 824          */
 825         CAT6_BINDLESS_NONUNIFORM = 6,
 826 } cat6_desc_mode_t;
 827
 828 /**
 829  * For atomic ops (which return a value):
 830  *
 831  *    pad1=1, pad3=c, pad5=3
 832  *    src1    - vecN offset/coords
 833  *    src2.x  - is actually dest register
 834  *    src2.y  - is 'data' except for cmpxchg where src2.y is 'compare'
 835  *              and src2.z is 'data'
 836  *
 837  * For stib (which does not return a value):
 838  *    pad1=0, pad3=c, pad5=2
 839  *    src1    - vecN offset/coords
 840  *    src2    - value to store
 841  *
 842  * For ldib:
 843  *    pad1=1, pad3=c, pad5=2
 844  *    src1    - vecN offset/coords
 845  *
 846  * for ldc (load from UBO using descriptor):
 847  *    pad1=0, pad3=8, pad5=2
 848  *
 849  * pad2 and pad5 are only observed to be 0.
 850  */
 851 typedef struct PACKED {
 852         /* dword0: */
 853         uint32_t pad1     : 1;
 854         uint32_t base     : 3;
 855         uint32_t pad2     : 2;
 856         uint32_t desc_mode : 3;
 857         uint32_t d        : 2;
 858         uint32_t typed    : 1;
 859         uint32_t type_size : 2;
 860         uint32_t opc      : 5;
 861         uint32_t pad3     : 5;
 862         uint32_t src1     : 8;  /* coordinate/offset */
 863
 864         /* dword1: */
 865         uint32_t src2     : 8;  /* or the dst for load instructions */
 866         uint32_t pad4     : 1;  //mustbe0 ??
 867         uint32_t ssbo     : 8;  /* ssbo/image binding point */
 868         uint32_t type     : 3;
 869         uint32_t pad5     : 7;
 870         uint32_t jmp_tgt  : 1;
 871         uint32_t sync     : 1;
 872         uint32_t opc_cat  : 3;
 873 } instr_cat6_a6xx_t;
 874
 875 typedef struct PACKED {
 876         /* dword0: */
 877         uint32_t pad1     : 32;
 878
 879         /* dword1: */
 880         uint32_t pad2     : 12;
 881         uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
 882         uint32_t pad3     : 6;
 883         uint32_t w        : 1;  /* write */
 884         uint32_t r        : 1;  /* read */
 885         uint32_t l        : 1;  /* local */
 886         uint32_t g        : 1;  /* global */
 887         uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
 888         uint32_t jmp_tgt  : 1;  /* (jp) */
 889         uint32_t sync     : 1;  /* (sy) */
 890         uint32_t opc_cat  : 3;
 891 } instr_cat7_t;
 892
 893 typedef union PACKED {
 894         instr_cat0_t cat0;
 895         instr_cat1_t cat1;
 896         instr_cat2_t cat2;
 897         instr_cat3_t cat3;
 898         instr_cat4_t cat4;
 899         instr_cat5_t cat5;
 900         instr_cat6_t cat6;
 901         instr_cat6_a6xx_t cat6_a6xx;
 902         instr_cat7_t cat7;
 903         struct PACKED {
 904                 /* dword0: */
 905                 uint32_t pad1     : 32;
 906
 907                 /* dword1: */
 908                 uint32_t pad2     : 12;
 909                 uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
 910                 uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
 911                 uint32_t pad3     : 13;
 912                 uint32_t jmp_tgt  : 1;
 913                 uint32_t sync     : 1;
 914                 uint32_t opc_cat  : 3;
 915
 916         };
 917 } instr_t;
 918
 919 static inline uint32_t instr_repeat(instr_t *instr)
 920 {
 921         switch (instr->opc_cat) {
 922         case 0:  return instr->cat0.repeat;
 923         case 1:  return instr->cat1.repeat;
 924         case 2:  return instr->cat2.repeat;
 925         case 3:  return instr->cat3.repeat;
 926         case 4:  return instr->cat4.repeat;
 927         default: return 0;
 928         }
 929 }
 930
 931 static inline bool instr_sat(instr_t *instr)
 932 {
 933         switch (instr->opc_cat) {
 934         case 2:  return instr->cat2.sat;
 935         case 3:  return instr->cat3.sat;
 936         case 4:  return instr->cat4.sat;
 937         default: return false;
 938         }
 939 }
 940
 941 /* We can probably drop the gpu_id arg, but keeping it for now so we can
 942  * assert if we see something we think should be new encoding on an older
 943  * gpu.
 944  */
 945 static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
 946 {
 947         instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
 948
 949         /* At least one of these two bits is pad in all the possible
 950          * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
 951          * cmdstream traces I have indicates that the pad bit is zero
 952          * in all cases.  So we can use this to detect new encoding:
 953          */
 954         if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
 955                 assert(gpu_id >= 600);
 956                 assert(instr->cat6.opc == 0);
 957                 return false;
 958         }
 959
 960         return true;
 961 }
 962
 963 static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
 964 {
 965         switch (instr->opc_cat) {
 966         case 0:  return instr->cat0.opc;
 967         case 1:  return 0;
 968         case 2:  return instr->cat2.opc;
 969         case 3:  return instr->cat3.opc;
 970         case 4:  return instr->cat4.opc;
 971         case 5:  return instr->cat5.opc;
 972         case 6:
 973                 if (!is_cat6_legacy(instr, gpu_id))
 974                         return instr->cat6_a6xx.opc;
 975                 return instr->cat6.opc;
 976         case 7:  return instr->cat7.opc;
 977         default: return 0;
 978         }
 979 }
 980
 981 static inline bool is_mad(opc_t opc)
 982 {
 983         switch (opc) {
 984         case OPC_MAD_U16:
 985         case OPC_MAD_S16:
 986         case OPC_MAD_U24:
 987         case OPC_MAD_S24:
 988         case OPC_MAD_F16:
 989         case OPC_MAD_F32:
 990                 return true;
 991         default:
 992                 return false;
 993         }
 994 }
 995
 996 static inline bool is_madsh(opc_t opc)
 997 {
 998         switch (opc) {
 999         case OPC_MADSH_U16:
1000         case OPC_MADSH_M16:
1001                 return true;
1002         default:
1003                 return false;
1004         }
1005 }
1006
1007 static inline bool is_atomic(opc_t opc)
1008 {
1009         switch (opc) {
1010         case OPC_ATOMIC_ADD:
1011         case OPC_ATOMIC_SUB:
1012         case OPC_ATOMIC_XCHG:
1013         case OPC_ATOMIC_INC:
1014         case OPC_ATOMIC_DEC:
1015         case OPC_ATOMIC_CMPXCHG:
1016         case OPC_ATOMIC_MIN:
1017         case OPC_ATOMIC_MAX:
1018         case OPC_ATOMIC_AND:
1019         case OPC_ATOMIC_OR:
1020         case OPC_ATOMIC_XOR:
1021                 return true;
1022         default:
1023                 return false;
1024         }
1025 }
1026
1027 static inline bool is_ssbo(opc_t opc)
1028 {
1029         switch (opc) {
1030         case OPC_RESFMT:
1031         case OPC_RESINFO:
1032         case OPC_LDGB:
1033         case OPC_STGB:
1034         case OPC_STIB:
1035                 return true;
1036         default:
1037                 return false;
1038         }
1039 }
1040
1041 static inline bool is_isam(opc_t opc)
1042 {
1043         switch (opc) {
1044         case OPC_ISAM:
1045         case OPC_ISAML:
1046         case OPC_ISAMM:
1047                 return true;
1048         default:
1049                 return false;
1050         }
1051 }
1052
1053
1054 static inline bool is_cat2_float(opc_t opc)
1055 {
1056         switch (opc) {
1057         case OPC_ADD_F:
1058         case OPC_MIN_F:
1059         case OPC_MAX_F:
1060         case OPC_MUL_F:
1061         case OPC_SIGN_F:
1062         case OPC_CMPS_F:
1063         case OPC_ABSNEG_F:
1064         case OPC_CMPV_F:
1065         case OPC_FLOOR_F:
1066         case OPC_CEIL_F:
1067         case OPC_RNDNE_F:
1068         case OPC_RNDAZ_F:
1069         case OPC_TRUNC_F:
1070                 return true;
1071
1072         default:
1073                 return false;
1074         }
1075 }
1076
1077 static inline bool is_cat3_float(opc_t opc)
1078 {
1079         switch (opc) {
1080         case OPC_MAD_F16:
1081         case OPC_MAD_F32:
1082         case OPC_SEL_F16:
1083         case OPC_SEL_F32:
1084                 return true;
1085         default:
1086                 return false;
1087         }
1088 }
1089
1090 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
1091
1092 #endif /* INSTR_A3XX_H_ */