freedreno/ir3: correct # of dest components for intrinsics
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3.h
index 1391cbd97dafd1fcc3d457461fbe1b4d4fa0c2c9..de7a2a887333c4e781501e3154ebb7d220516fa5 100644 (file)
@@ -58,8 +58,14 @@ struct ir3_register {
                IR3_REG_CONST  = 0x001,
                IR3_REG_IMMED  = 0x002,
                IR3_REG_HALF   = 0x004,
-               IR3_REG_RELATIV= 0x008,
-               IR3_REG_R      = 0x010,
+               /* high registers are used for some things in compute shaders,
+                * for example.  Seems to be for things that are global to all
+                * threads in a wave, so possibly these are global/shared by
+                * all the threads in the wave?
+                */
+               IR3_REG_HIGH   = 0x008,
+               IR3_REG_RELATIV= 0x010,
+               IR3_REG_R      = 0x020,
                /* Most instructions, it seems, can do float abs/neg but not
                 * integer.  The CP pass needs to know what is intended (int or
                 * float) in order to do the right thing.  For this reason the
@@ -68,23 +74,23 @@ struct ir3_register {
                 * bitwise not, so split that out into a new flag to make it
                 * more clear.
                 */
-               IR3_REG_FNEG   = 0x020,
-               IR3_REG_FABS   = 0x040,
-               IR3_REG_SNEG   = 0x080,
-               IR3_REG_SABS   = 0x100,
-               IR3_REG_BNOT   = 0x200,
-               IR3_REG_EVEN   = 0x400,
-               IR3_REG_POS_INF= 0x800,
+               IR3_REG_FNEG   = 0x040,
+               IR3_REG_FABS   = 0x080,
+               IR3_REG_SNEG   = 0x100,
+               IR3_REG_SABS   = 0x200,
+               IR3_REG_BNOT   = 0x400,
+               IR3_REG_EVEN   = 0x800,
+               IR3_REG_POS_INF= 0x1000,
                /* (ei) flag, end-input?  Set on last bary, presumably to signal
                 * that the shader needs no more input:
                 */
-               IR3_REG_EI     = 0x1000,
+               IR3_REG_EI     = 0x2000,
                /* meta-flags, for intermediate stages of IR, ie.
                 * before register assignment is done:
                 */
-               IR3_REG_SSA    = 0x2000,   /* 'instr' is ptr to assigning instr */
-               IR3_REG_ARRAY  = 0x4000,
-               IR3_REG_PHI_SRC= 0x8000,   /* phi src, regs[0]->instr points to phi */
+               IR3_REG_SSA    = 0x4000,   /* 'instr' is ptr to assigning instr */
+               IR3_REG_ARRAY  = 0x8000,
+               IR3_REG_PHI_SRC= 0x10000,  /* phi src, regs[0]->instr points to phi */
 
        } flags;
        union {
@@ -220,7 +226,7 @@ struct ir3_instruction {
                        type_t type;
                        int src_offset;
                        int dst_offset;
-                       int iim_val;
+                       int iim_val;          /* for ldgb/stgb, # of components */
                } cat6;
                /* for meta-instructions, just used to hold extra data
                 * before instruction scheduling, etc
@@ -308,8 +314,14 @@ struct ir3_instruction {
 static inline struct ir3_instruction *
 ir3_neighbor_first(struct ir3_instruction *instr)
 {
-       while (instr->cp.left)
+       int cnt = 0;
+       while (instr->cp.left) {
                instr = instr->cp.left;
+               if (++cnt > 0xffff) {
+                       debug_assert(0);
+                       break;
+               }
+       }
        return instr;
 }
 
@@ -322,12 +334,29 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr)
        while (instr->cp.right) {
                num++;
                instr = instr->cp.right;
+               if (num > 0xffff) {
+                       debug_assert(0);
+                       break;
+               }
        }
 
        return num;
 }
 
-struct ir3_heap_chunk;
+/*
+ * Stupid/simple growable array implementation:
+ */
+#define DECLARE_ARRAY(type, name) \
+       unsigned name ## _count, name ## _sz; \
+       type * name;
+
+#define array_insert(ctx, arr, val) do { \
+               if (arr ## _count == arr ## _sz) { \
+                       arr ## _sz = MAX2(2 * arr ## _sz, 16); \
+                       arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \
+               } \
+               arr[arr ##_count++] = val; \
+       } while (0)
 
 struct ir3 {
        struct ir3_compiler *compiler;
@@ -342,8 +371,7 @@ struct ir3 {
         * threads in a group are killed before the last bary.f gets
         * a chance to signal end of input (ei).
         */
-       unsigned baryfs_count, baryfs_sz;
-       struct ir3_instruction **baryfs;
+       DECLARE_ARRAY(struct ir3_instruction *, baryfs);
 
        /* Track all indirect instructions (read and write).  To avoid
         * deadlock scenario where an address register gets scheduled,
@@ -355,36 +383,31 @@ struct ir3 {
         * convenient list of instructions that reference some address
         * register simplifies this.
         */
-       unsigned indirects_count, indirects_sz;
-       struct ir3_instruction **indirects;
+       DECLARE_ARRAY(struct ir3_instruction *, indirects);
+
        /* and same for instructions that consume predicate register: */
-       unsigned predicates_count, predicates_sz;
-       struct ir3_instruction **predicates;
+       DECLARE_ARRAY(struct ir3_instruction *, predicates);
 
-       /* Track instructions which do not write a register but other-
-        * wise must not be discarded (such as kill, stg, etc)
+       /* Track texture sample instructions which need texture state
+        * patched in (for astc-srgb workaround):
         */
-       unsigned keeps_count, keeps_sz;
-       struct ir3_instruction **keeps;
+       DECLARE_ARRAY(struct ir3_instruction *, astc_srgb);
 
        /* List of blocks: */
        struct list_head block_list;
 
        /* List of ir3_array's: */
        struct list_head array_list;
-
-       unsigned heap_idx;
-       struct ir3_heap_chunk *chunk;
 };
 
-typedef struct nir_variable nir_variable;
+typedef struct nir_register nir_register;
 
 struct ir3_array {
        struct list_head node;
        unsigned length;
        unsigned id;
 
-       nir_variable *var;
+       nir_register *r;
 
        /* We track the last write and last access (read or write) to
         * setup dependencies on instructions that read or write the
@@ -424,6 +447,11 @@ struct ir3_block {
 
        uint16_t start_ip, end_ip;
 
+       /* Track instructions which do not write a register but other-
+        * wise must not be discarded (such as kill, stg, etc)
+        */
+       DECLARE_ARRAY(struct ir3_instruction *, keeps);
+
        /* used for per-pass extra block data.  Mainly used right
         * now in RA step to track livein/liveout.
         */
@@ -434,6 +462,16 @@ struct ir3_block {
 #endif
 };
 
+static inline uint32_t
+block_id(struct ir3_block *block)
+{
+#ifdef DEBUG
+       return block->serialno;
+#else
+       return (uint32_t)(unsigned long)block;
+#endif
+}
+
 struct ir3 * ir3_create(struct ir3_compiler *compiler,
                unsigned nin, unsigned nout);
 void ir3_destroy(struct ir3 *shader);
@@ -443,10 +481,9 @@ void * ir3_alloc(struct ir3 *shader, int sz);
 
 struct ir3_block * ir3_block_create(struct ir3 *shader);
 
-struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
-               int category, opc_t opc);
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc);
 struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
-               int category, opc_t opc, int nreg);
+               opc_t opc, int nreg);
 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
 const char *ir3_instr_name(struct ir3_instruction *instr);
 
@@ -576,6 +613,7 @@ is_store(struct ir3_instruction *instr)
         */
        switch (instr->opc) {
        case OPC_STG:
+       case OPC_STGB:
        case OPC_STP:
        case OPC_STL:
        case OPC_STLW:
@@ -591,11 +629,12 @@ static inline bool is_load(struct ir3_instruction *instr)
 {
        switch (instr->opc) {
        case OPC_LDG:
+       case OPC_LDGB:
        case OPC_LDL:
        case OPC_LDP:
        case OPC_L2G:
        case OPC_LDLW:
-       case OPC_LDC_4:
+       case OPC_LDC:
        case OPC_LDLV:
                /* probably some others too.. */
                return true;
@@ -619,6 +658,18 @@ static inline bool is_input(struct ir3_instruction *instr)
        }
 }
 
+static inline bool is_bool(struct ir3_instruction *instr)
+{
+       switch (instr->opc) {
+       case OPC_CMPS_F:
+       case OPC_CMPS_S:
+       case OPC_CMPS_U:
+               return true;
+       default:
+               return false;
+       }
+}
+
 static inline bool is_meta(struct ir3_instruction *instr)
 {
        /* TODO how should we count PHI (and maybe fan-in/out) which
@@ -822,14 +873,6 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
        }
 }
 
-#define array_insert(arr, val) do { \
-               if (arr ## _count == arr ## _sz) { \
-                       arr ## _sz = MAX2(2 * arr ## _sz, 16); \
-                       arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \
-               } \
-               arr[arr ##_count++] = val; \
-       } while (0)
-
 /* iterator for an instructions's sources (reg), also returns src #: */
 #define foreach_src_n(__srcreg, __n, __instr) \
        if ((__instr)->regs_count) \
@@ -878,7 +921,8 @@ void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
 void ir3_depth(struct ir3 *ir);
 
 /* copy-propagate: */
-void ir3_cp(struct ir3 *ir);
+struct ir3_shader_variant;
+void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
 
 /* group neighbors and insert mov's to resolve conflicts: */
 void ir3_group(struct ir3 *ir);
@@ -892,7 +936,7 @@ int ir3_ra(struct ir3 *ir3, enum shader_t type,
                bool frag_coord, bool frag_face);
 
 /* legalize: */
-void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary);
+void ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary);
 
 /* ************************************************************************* */
 /* instruction helpers */
@@ -900,8 +944,7 @@ void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary);
 static inline struct ir3_instruction *
 ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
 {
-       struct ir3_instruction *instr =
-               ir3_instr_create(block, 1, OPC_MOV);
+       struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
        ir3_reg_create(instr, 0, 0);   /* dst */
        if (src->regs[0]->flags & IR3_REG_ARRAY) {
                struct ir3_register *src_reg =
@@ -921,8 +964,7 @@ static inline struct ir3_instruction *
 ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
                type_t src_type, type_t dst_type)
 {
-       struct ir3_instruction *instr =
-               ir3_instr_create(block, 1, OPC_MOV);
+       struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
        ir3_reg_create(instr, 0, 0);   /* dst */
        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
        instr->cat1.src_type = src_type;
@@ -934,45 +976,45 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
 static inline struct ir3_instruction *
 ir3_NOP(struct ir3_block *block)
 {
-       return ir3_instr_create(block, 0, OPC_NOP);
+       return ir3_instr_create(block, OPC_NOP);
 }
 
-#define INSTR0(CAT, name)                                                \
+#define INSTR0(name)                                                     \
 static inline struct ir3_instruction *                                   \
 ir3_##name(struct ir3_block *block)                                      \
 {                                                                        \
        struct ir3_instruction *instr =                                      \
-               ir3_instr_create(block, CAT, OPC_##name);                        \
+               ir3_instr_create(block, OPC_##name);                             \
        return instr;                                                        \
 }
 
-#define INSTR1(CAT, name)                                                \
+#define INSTR1(name)                                                     \
 static inline struct ir3_instruction *                                   \
 ir3_##name(struct ir3_block *block,                                      \
                struct ir3_instruction *a, unsigned aflags)                      \
 {                                                                        \
        struct ir3_instruction *instr =                                      \
-               ir3_instr_create(block, CAT, OPC_##name);                        \
+               ir3_instr_create(block, OPC_##name);                             \
        ir3_reg_create(instr, 0, 0);   /* dst */                             \
        ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
        return instr;                                                        \
 }
 
-#define INSTR2(CAT, name)                                                \
+#define INSTR2(name)                                                     \
 static inline struct ir3_instruction *                                   \
 ir3_##name(struct ir3_block *block,                                      \
                struct ir3_instruction *a, unsigned aflags,                      \
                struct ir3_instruction *b, unsigned bflags)                      \
 {                                                                        \
        struct ir3_instruction *instr =                                      \
-               ir3_instr_create(block, CAT, OPC_##name);                        \
+               ir3_instr_create(block, OPC_##name);                             \
        ir3_reg_create(instr, 0, 0);   /* dst */                             \
        ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
        ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
        return instr;                                                        \
 }
 
-#define INSTR3(CAT, name)                                                \
+#define INSTR3(name)                                                     \
 static inline struct ir3_instruction *                                   \
 ir3_##name(struct ir3_block *block,                                      \
                struct ir3_instruction *a, unsigned aflags,                      \
@@ -980,98 +1022,116 @@ ir3_##name(struct ir3_block *block,                                      \
                struct ir3_instruction *c, unsigned cflags)                      \
 {                                                                        \
        struct ir3_instruction *instr =                                      \
-               ir3_instr_create(block, CAT, OPC_##name);                        \
+               ir3_instr_create(block, OPC_##name);                             \
+       ir3_reg_create(instr, 0, 0);   /* dst */                             \
+       ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
+       ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
+       ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c;           \
+       return instr;                                                        \
+}
+
+#define INSTR4(name)                                                     \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+               struct ir3_instruction *a, unsigned aflags,                      \
+               struct ir3_instruction *b, unsigned bflags,                      \
+               struct ir3_instruction *c, unsigned cflags,                      \
+               struct ir3_instruction *d, unsigned dflags)                      \
+{                                                                        \
+       struct ir3_instruction *instr =                                      \
+               ir3_instr_create2(block, OPC_##name, 5);                         \
        ir3_reg_create(instr, 0, 0);   /* dst */                             \
        ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
        ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
        ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c;           \
+       ir3_reg_create(instr, 0, IR3_REG_SSA | dflags)->instr = d;           \
        return instr;                                                        \
 }
 
 /* cat0 instructions: */
-INSTR0(0, BR);
-INSTR0(0, JUMP);
-INSTR1(0, KILL);
-INSTR0(0, END);
+INSTR0(BR);
+INSTR0(JUMP);
+INSTR1(KILL);
+INSTR0(END);
 
 /* cat2 instructions, most 2 src but some 1 src: */
-INSTR2(2, ADD_F)
-INSTR2(2, MIN_F)
-INSTR2(2, MAX_F)
-INSTR2(2, MUL_F)
-INSTR1(2, SIGN_F)
-INSTR2(2, CMPS_F)
-INSTR1(2, ABSNEG_F)
-INSTR2(2, CMPV_F)
-INSTR1(2, FLOOR_F)
-INSTR1(2, CEIL_F)
-INSTR1(2, RNDNE_F)
-INSTR1(2, RNDAZ_F)
-INSTR1(2, TRUNC_F)
-INSTR2(2, ADD_U)
-INSTR2(2, ADD_S)
-INSTR2(2, SUB_U)
-INSTR2(2, SUB_S)
-INSTR2(2, CMPS_U)
-INSTR2(2, CMPS_S)
-INSTR2(2, MIN_U)
-INSTR2(2, MIN_S)
-INSTR2(2, MAX_U)
-INSTR2(2, MAX_S)
-INSTR1(2, ABSNEG_S)
-INSTR2(2, AND_B)
-INSTR2(2, OR_B)
-INSTR1(2, NOT_B)
-INSTR2(2, XOR_B)
-INSTR2(2, CMPV_U)
-INSTR2(2, CMPV_S)
-INSTR2(2, MUL_U)
-INSTR2(2, MUL_S)
-INSTR2(2, MULL_U)
-INSTR1(2, BFREV_B)
-INSTR1(2, CLZ_S)
-INSTR1(2, CLZ_B)
-INSTR2(2, SHL_B)
-INSTR2(2, SHR_B)
-INSTR2(2, ASHR_B)
-INSTR2(2, BARY_F)
-INSTR2(2, MGEN_B)
-INSTR2(2, GETBIT_B)
-INSTR1(2, SETRM)
-INSTR1(2, CBITS_B)
-INSTR2(2, SHB)
-INSTR2(2, MSAD)
+INSTR2(ADD_F)
+INSTR2(MIN_F)
+INSTR2(MAX_F)
+INSTR2(MUL_F)
+INSTR1(SIGN_F)
+INSTR2(CMPS_F)
+INSTR1(ABSNEG_F)
+INSTR2(CMPV_F)
+INSTR1(FLOOR_F)
+INSTR1(CEIL_F)
+INSTR1(RNDNE_F)
+INSTR1(RNDAZ_F)
+INSTR1(TRUNC_F)
+INSTR2(ADD_U)
+INSTR2(ADD_S)
+INSTR2(SUB_U)
+INSTR2(SUB_S)
+INSTR2(CMPS_U)
+INSTR2(CMPS_S)
+INSTR2(MIN_U)
+INSTR2(MIN_S)
+INSTR2(MAX_U)
+INSTR2(MAX_S)
+INSTR1(ABSNEG_S)
+INSTR2(AND_B)
+INSTR2(OR_B)
+INSTR1(NOT_B)
+INSTR2(XOR_B)
+INSTR2(CMPV_U)
+INSTR2(CMPV_S)
+INSTR2(MUL_U)
+INSTR2(MUL_S)
+INSTR2(MULL_U)
+INSTR1(BFREV_B)
+INSTR1(CLZ_S)
+INSTR1(CLZ_B)
+INSTR2(SHL_B)
+INSTR2(SHR_B)
+INSTR2(ASHR_B)
+INSTR2(BARY_F)
+INSTR2(MGEN_B)
+INSTR2(GETBIT_B)
+INSTR1(SETRM)
+INSTR1(CBITS_B)
+INSTR2(SHB)
+INSTR2(MSAD)
 
 /* cat3 instructions: */
-INSTR3(3, MAD_U16)
-INSTR3(3, MADSH_U16)
-INSTR3(3, MAD_S16)
-INSTR3(3, MADSH_M16)
-INSTR3(3, MAD_U24)
-INSTR3(3, MAD_S24)
-INSTR3(3, MAD_F16)
-INSTR3(3, MAD_F32)
-INSTR3(3, SEL_B16)
-INSTR3(3, SEL_B32)
-INSTR3(3, SEL_S16)
-INSTR3(3, SEL_S32)
-INSTR3(3, SEL_F16)
-INSTR3(3, SEL_F32)
-INSTR3(3, SAD_S16)
-INSTR3(3, SAD_S32)
+INSTR3(MAD_U16)
+INSTR3(MADSH_U16)
+INSTR3(MAD_S16)
+INSTR3(MADSH_M16)
+INSTR3(MAD_U24)
+INSTR3(MAD_S24)
+INSTR3(MAD_F16)
+INSTR3(MAD_F32)
+INSTR3(SEL_B16)
+INSTR3(SEL_B32)
+INSTR3(SEL_S16)
+INSTR3(SEL_S32)
+INSTR3(SEL_F16)
+INSTR3(SEL_F32)
+INSTR3(SAD_S16)
+INSTR3(SAD_S32)
 
 /* cat4 instructions: */
-INSTR1(4, RCP)
-INSTR1(4, RSQ)
-INSTR1(4, LOG2)
-INSTR1(4, EXP2)
-INSTR1(4, SIN)
-INSTR1(4, COS)
-INSTR1(4, SQRT)
+INSTR1(RCP)
+INSTR1(RSQ)
+INSTR1(LOG2)
+INSTR1(EXP2)
+INSTR1(SIN)
+INSTR1(COS)
+INSTR1(SQRT)
 
 /* cat5 instructions: */
-INSTR1(5, DSX)
-INSTR1(5, DSY)
+INSTR1(DSX)
+INSTR1(DSY)
 
 static inline struct ir3_instruction *
 ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
@@ -1081,7 +1141,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
        struct ir3_instruction *sam;
        struct ir3_register *reg;
 
-       sam = ir3_instr_create(block, 5, opc);
+       sam = ir3_instr_create(block, opc);
        sam->flags |= flags;
        ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
        if (src0) {
@@ -1102,9 +1162,22 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
 }
 
 /* cat6 instructions: */
-INSTR2(6, LDLV)
-INSTR2(6, LDG)
-INSTR3(6, STG)
+INSTR2(LDLV)
+INSTR2(LDG)
+INSTR3(STG)
+INSTR3(LDGB);
+INSTR4(STGB);
+INSTR4(ATOMIC_ADD);
+INSTR4(ATOMIC_SUB);
+INSTR4(ATOMIC_XCHG);
+INSTR4(ATOMIC_INC);
+INSTR4(ATOMIC_DEC);
+INSTR4(ATOMIC_CMPXCHG);
+INSTR4(ATOMIC_MIN);
+INSTR4(ATOMIC_MAX);
+INSTR4(ATOMIC_AND);
+INSTR4(ATOMIC_OR);
+INSTR4(ATOMIC_XOR);
 
 /* ************************************************************************* */
 /* split this out or find some helper to use.. like main/bitset.h.. */