X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fir3%2Fir3.h;h=de7a2a887333c4e781501e3154ebb7d220516fa5;hb=86154acb5722c2eecf46324c5509e963a212ba44;hp=1391cbd97dafd1fcc3d457461fbe1b4d4fa0c2c9;hpb=19739e4fb9024f42a8fc332e6fa94c292bb6bc16;p=mesa.git diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 1391cbd97da..de7a2a88733 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -58,8 +58,14 @@ struct ir3_register { IR3_REG_CONST = 0x001, IR3_REG_IMMED = 0x002, IR3_REG_HALF = 0x004, - IR3_REG_RELATIV= 0x008, - IR3_REG_R = 0x010, + /* high registers are used for some things in compute shaders, + * for example. Seems to be for things that are global to all + * threads in a wave, so possibly these are global/shared by + * all the threads in the wave? + */ + IR3_REG_HIGH = 0x008, + IR3_REG_RELATIV= 0x010, + IR3_REG_R = 0x020, /* Most instructions, it seems, can do float abs/neg but not * integer. The CP pass needs to know what is intended (int or * float) in order to do the right thing. For this reason the @@ -68,23 +74,23 @@ struct ir3_register { * bitwise not, so split that out into a new flag to make it * more clear. */ - IR3_REG_FNEG = 0x020, - IR3_REG_FABS = 0x040, - IR3_REG_SNEG = 0x080, - IR3_REG_SABS = 0x100, - IR3_REG_BNOT = 0x200, - IR3_REG_EVEN = 0x400, - IR3_REG_POS_INF= 0x800, + IR3_REG_FNEG = 0x040, + IR3_REG_FABS = 0x080, + IR3_REG_SNEG = 0x100, + IR3_REG_SABS = 0x200, + IR3_REG_BNOT = 0x400, + IR3_REG_EVEN = 0x800, + IR3_REG_POS_INF= 0x1000, /* (ei) flag, end-input? Set on last bary, presumably to signal * that the shader needs no more input: */ - IR3_REG_EI = 0x1000, + IR3_REG_EI = 0x2000, /* meta-flags, for intermediate stages of IR, ie. * before register assignment is done: */ - IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */ - IR3_REG_ARRAY = 0x4000, - IR3_REG_PHI_SRC= 0x8000, /* phi src, regs[0]->instr points to phi */ + IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */ + IR3_REG_ARRAY = 0x8000, + IR3_REG_PHI_SRC= 0x10000, /* phi src, regs[0]->instr points to phi */ } flags; union { @@ -220,7 +226,7 @@ struct ir3_instruction { type_t type; int src_offset; int dst_offset; - int iim_val; + int iim_val; /* for ldgb/stgb, # of components */ } cat6; /* for meta-instructions, just used to hold extra data * before instruction scheduling, etc @@ -308,8 +314,14 @@ struct ir3_instruction { static inline struct ir3_instruction * ir3_neighbor_first(struct ir3_instruction *instr) { - while (instr->cp.left) + int cnt = 0; + while (instr->cp.left) { instr = instr->cp.left; + if (++cnt > 0xffff) { + debug_assert(0); + break; + } + } return instr; } @@ -322,12 +334,29 @@ static inline int ir3_neighbor_count(struct ir3_instruction *instr) while (instr->cp.right) { num++; instr = instr->cp.right; + if (num > 0xffff) { + debug_assert(0); + break; + } } return num; } -struct ir3_heap_chunk; +/* + * Stupid/simple growable array implementation: + */ +#define DECLARE_ARRAY(type, name) \ + unsigned name ## _count, name ## _sz; \ + type * name; + +#define array_insert(ctx, arr, val) do { \ + if (arr ## _count == arr ## _sz) { \ + arr ## _sz = MAX2(2 * arr ## _sz, 16); \ + arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \ + } \ + arr[arr ##_count++] = val; \ + } while (0) struct ir3 { struct ir3_compiler *compiler; @@ -342,8 +371,7 @@ struct ir3 { * threads in a group are killed before the last bary.f gets * a chance to signal end of input (ei). */ - unsigned baryfs_count, baryfs_sz; - struct ir3_instruction **baryfs; + DECLARE_ARRAY(struct ir3_instruction *, baryfs); /* Track all indirect instructions (read and write). To avoid * deadlock scenario where an address register gets scheduled, @@ -355,36 +383,31 @@ struct ir3 { * convenient list of instructions that reference some address * register simplifies this. */ - unsigned indirects_count, indirects_sz; - struct ir3_instruction **indirects; + DECLARE_ARRAY(struct ir3_instruction *, indirects); + /* and same for instructions that consume predicate register: */ - unsigned predicates_count, predicates_sz; - struct ir3_instruction **predicates; + DECLARE_ARRAY(struct ir3_instruction *, predicates); - /* Track instructions which do not write a register but other- - * wise must not be discarded (such as kill, stg, etc) + /* Track texture sample instructions which need texture state + * patched in (for astc-srgb workaround): */ - unsigned keeps_count, keeps_sz; - struct ir3_instruction **keeps; + DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); /* List of blocks: */ struct list_head block_list; /* List of ir3_array's: */ struct list_head array_list; - - unsigned heap_idx; - struct ir3_heap_chunk *chunk; }; -typedef struct nir_variable nir_variable; +typedef struct nir_register nir_register; struct ir3_array { struct list_head node; unsigned length; unsigned id; - nir_variable *var; + nir_register *r; /* We track the last write and last access (read or write) to * setup dependencies on instructions that read or write the @@ -424,6 +447,11 @@ struct ir3_block { uint16_t start_ip, end_ip; + /* Track instructions which do not write a register but other- + * wise must not be discarded (such as kill, stg, etc) + */ + DECLARE_ARRAY(struct ir3_instruction *, keeps); + /* used for per-pass extra block data. Mainly used right * now in RA step to track livein/liveout. */ @@ -434,6 +462,16 @@ struct ir3_block { #endif }; +static inline uint32_t +block_id(struct ir3_block *block) +{ +#ifdef DEBUG + return block->serialno; +#else + return (uint32_t)(unsigned long)block; +#endif +} + struct ir3 * ir3_create(struct ir3_compiler *compiler, unsigned nin, unsigned nout); void ir3_destroy(struct ir3 *shader); @@ -443,10 +481,9 @@ void * ir3_alloc(struct ir3 *shader, int sz); struct ir3_block * ir3_block_create(struct ir3 *shader); -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, - int category, opc_t opc); +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc); struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, - int category, opc_t opc, int nreg); + opc_t opc, int nreg); struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); const char *ir3_instr_name(struct ir3_instruction *instr); @@ -576,6 +613,7 @@ is_store(struct ir3_instruction *instr) */ switch (instr->opc) { case OPC_STG: + case OPC_STGB: case OPC_STP: case OPC_STL: case OPC_STLW: @@ -591,11 +629,12 @@ static inline bool is_load(struct ir3_instruction *instr) { switch (instr->opc) { case OPC_LDG: + case OPC_LDGB: case OPC_LDL: case OPC_LDP: case OPC_L2G: case OPC_LDLW: - case OPC_LDC_4: + case OPC_LDC: case OPC_LDLV: /* probably some others too.. */ return true; @@ -619,6 +658,18 @@ static inline bool is_input(struct ir3_instruction *instr) } } +static inline bool is_bool(struct ir3_instruction *instr) +{ + switch (instr->opc) { + case OPC_CMPS_F: + case OPC_CMPS_S: + case OPC_CMPS_U: + return true; + default: + return false; + } +} + static inline bool is_meta(struct ir3_instruction *instr) { /* TODO how should we count PHI (and maybe fan-in/out) which @@ -822,14 +873,6 @@ static inline unsigned ir3_cat3_absneg(opc_t opc) } } -#define array_insert(arr, val) do { \ - if (arr ## _count == arr ## _sz) { \ - arr ## _sz = MAX2(2 * arr ## _sz, 16); \ - arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \ - } \ - arr[arr ##_count++] = val; \ - } while (0) - /* iterator for an instructions's sources (reg), also returns src #: */ #define foreach_src_n(__srcreg, __n, __instr) \ if ((__instr)->regs_count) \ @@ -878,7 +921,8 @@ void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); void ir3_depth(struct ir3 *ir); /* copy-propagate: */ -void ir3_cp(struct ir3 *ir); +struct ir3_shader_variant; +void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); /* group neighbors and insert mov's to resolve conflicts: */ void ir3_group(struct ir3 *ir); @@ -892,7 +936,7 @@ int ir3_ra(struct ir3 *ir3, enum shader_t type, bool frag_coord, bool frag_face); /* legalize: */ -void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary); +void ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary); /* ************************************************************************* */ /* instruction helpers */ @@ -900,8 +944,7 @@ void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary); static inline struct ir3_instruction * ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) { - struct ir3_instruction *instr = - ir3_instr_create(block, 1, OPC_MOV); + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); ir3_reg_create(instr, 0, 0); /* dst */ if (src->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_register *src_reg = @@ -921,8 +964,7 @@ static inline struct ir3_instruction * ir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type, type_t dst_type) { - struct ir3_instruction *instr = - ir3_instr_create(block, 1, OPC_MOV); + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); ir3_reg_create(instr, 0, 0); /* dst */ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; instr->cat1.src_type = src_type; @@ -934,45 +976,45 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src, static inline struct ir3_instruction * ir3_NOP(struct ir3_block *block) { - return ir3_instr_create(block, 0, OPC_NOP); + return ir3_instr_create(block, OPC_NOP); } -#define INSTR0(CAT, name) \ +#define INSTR0(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ return instr; \ } -#define INSTR1(CAT, name) \ +#define INSTR1(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ return instr; \ } -#define INSTR2(CAT, name) \ +#define INSTR2(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags, \ struct ir3_instruction *b, unsigned bflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ return instr; \ } -#define INSTR3(CAT, name) \ +#define INSTR3(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags, \ @@ -980,98 +1022,116 @@ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *c, unsigned cflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ + ir3_reg_create(instr, 0, 0); /* dst */ \ + ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ + ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ + ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \ + return instr; \ +} + +#define INSTR4(name) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags, \ + struct ir3_instruction *b, unsigned bflags, \ + struct ir3_instruction *c, unsigned cflags, \ + struct ir3_instruction *d, unsigned dflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create2(block, OPC_##name, 5); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \ + ir3_reg_create(instr, 0, IR3_REG_SSA | dflags)->instr = d; \ return instr; \ } /* cat0 instructions: */ -INSTR0(0, BR); -INSTR0(0, JUMP); -INSTR1(0, KILL); -INSTR0(0, END); +INSTR0(BR); +INSTR0(JUMP); +INSTR1(KILL); +INSTR0(END); /* cat2 instructions, most 2 src but some 1 src: */ -INSTR2(2, ADD_F) -INSTR2(2, MIN_F) -INSTR2(2, MAX_F) -INSTR2(2, MUL_F) -INSTR1(2, SIGN_F) -INSTR2(2, CMPS_F) -INSTR1(2, ABSNEG_F) -INSTR2(2, CMPV_F) -INSTR1(2, FLOOR_F) -INSTR1(2, CEIL_F) -INSTR1(2, RNDNE_F) -INSTR1(2, RNDAZ_F) -INSTR1(2, TRUNC_F) -INSTR2(2, ADD_U) -INSTR2(2, ADD_S) -INSTR2(2, SUB_U) -INSTR2(2, SUB_S) -INSTR2(2, CMPS_U) -INSTR2(2, CMPS_S) -INSTR2(2, MIN_U) -INSTR2(2, MIN_S) -INSTR2(2, MAX_U) -INSTR2(2, MAX_S) -INSTR1(2, ABSNEG_S) -INSTR2(2, AND_B) -INSTR2(2, OR_B) -INSTR1(2, NOT_B) -INSTR2(2, XOR_B) -INSTR2(2, CMPV_U) -INSTR2(2, CMPV_S) -INSTR2(2, MUL_U) -INSTR2(2, MUL_S) -INSTR2(2, MULL_U) -INSTR1(2, BFREV_B) -INSTR1(2, CLZ_S) -INSTR1(2, CLZ_B) -INSTR2(2, SHL_B) -INSTR2(2, SHR_B) -INSTR2(2, ASHR_B) -INSTR2(2, BARY_F) -INSTR2(2, MGEN_B) -INSTR2(2, GETBIT_B) -INSTR1(2, SETRM) -INSTR1(2, CBITS_B) -INSTR2(2, SHB) -INSTR2(2, MSAD) +INSTR2(ADD_F) +INSTR2(MIN_F) +INSTR2(MAX_F) +INSTR2(MUL_F) +INSTR1(SIGN_F) +INSTR2(CMPS_F) +INSTR1(ABSNEG_F) +INSTR2(CMPV_F) +INSTR1(FLOOR_F) +INSTR1(CEIL_F) +INSTR1(RNDNE_F) +INSTR1(RNDAZ_F) +INSTR1(TRUNC_F) +INSTR2(ADD_U) +INSTR2(ADD_S) +INSTR2(SUB_U) +INSTR2(SUB_S) +INSTR2(CMPS_U) +INSTR2(CMPS_S) +INSTR2(MIN_U) +INSTR2(MIN_S) +INSTR2(MAX_U) +INSTR2(MAX_S) +INSTR1(ABSNEG_S) +INSTR2(AND_B) +INSTR2(OR_B) +INSTR1(NOT_B) +INSTR2(XOR_B) +INSTR2(CMPV_U) +INSTR2(CMPV_S) +INSTR2(MUL_U) +INSTR2(MUL_S) +INSTR2(MULL_U) +INSTR1(BFREV_B) +INSTR1(CLZ_S) +INSTR1(CLZ_B) +INSTR2(SHL_B) +INSTR2(SHR_B) +INSTR2(ASHR_B) +INSTR2(BARY_F) +INSTR2(MGEN_B) +INSTR2(GETBIT_B) +INSTR1(SETRM) +INSTR1(CBITS_B) +INSTR2(SHB) +INSTR2(MSAD) /* cat3 instructions: */ -INSTR3(3, MAD_U16) -INSTR3(3, MADSH_U16) -INSTR3(3, MAD_S16) -INSTR3(3, MADSH_M16) -INSTR3(3, MAD_U24) -INSTR3(3, MAD_S24) -INSTR3(3, MAD_F16) -INSTR3(3, MAD_F32) -INSTR3(3, SEL_B16) -INSTR3(3, SEL_B32) -INSTR3(3, SEL_S16) -INSTR3(3, SEL_S32) -INSTR3(3, SEL_F16) -INSTR3(3, SEL_F32) -INSTR3(3, SAD_S16) -INSTR3(3, SAD_S32) +INSTR3(MAD_U16) +INSTR3(MADSH_U16) +INSTR3(MAD_S16) +INSTR3(MADSH_M16) +INSTR3(MAD_U24) +INSTR3(MAD_S24) +INSTR3(MAD_F16) +INSTR3(MAD_F32) +INSTR3(SEL_B16) +INSTR3(SEL_B32) +INSTR3(SEL_S16) +INSTR3(SEL_S32) +INSTR3(SEL_F16) +INSTR3(SEL_F32) +INSTR3(SAD_S16) +INSTR3(SAD_S32) /* cat4 instructions: */ -INSTR1(4, RCP) -INSTR1(4, RSQ) -INSTR1(4, LOG2) -INSTR1(4, EXP2) -INSTR1(4, SIN) -INSTR1(4, COS) -INSTR1(4, SQRT) +INSTR1(RCP) +INSTR1(RSQ) +INSTR1(LOG2) +INSTR1(EXP2) +INSTR1(SIN) +INSTR1(COS) +INSTR1(SQRT) /* cat5 instructions: */ -INSTR1(5, DSX) -INSTR1(5, DSY) +INSTR1(DSX) +INSTR1(DSY) static inline struct ir3_instruction * ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, @@ -1081,7 +1141,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, struct ir3_instruction *sam; struct ir3_register *reg; - sam = ir3_instr_create(block, 5, opc); + sam = ir3_instr_create(block, opc); sam->flags |= flags; ir3_reg_create(sam, 0, 0)->wrmask = wrmask; if (src0) { @@ -1102,9 +1162,22 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, } /* cat6 instructions: */ -INSTR2(6, LDLV) -INSTR2(6, LDG) -INSTR3(6, STG) +INSTR2(LDLV) +INSTR2(LDG) +INSTR3(STG) +INSTR3(LDGB); +INSTR4(STGB); +INSTR4(ATOMIC_ADD); +INSTR4(ATOMIC_SUB); +INSTR4(ATOMIC_XCHG); +INSTR4(ATOMIC_INC); +INSTR4(ATOMIC_DEC); +INSTR4(ATOMIC_CMPXCHG); +INSTR4(ATOMIC_MIN); +INSTR4(ATOMIC_MAX); +INSTR4(ATOMIC_AND); +INSTR4(ATOMIC_OR); +INSTR4(ATOMIC_XOR); /* ************************************************************************* */ /* split this out or find some helper to use.. like main/bitset.h.. */