#include <stdbool.h>
#include "util/u_debug.h"
+#include "util/list.h"
#include "instr-a3xx.h"
#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */
/* low level intermediate representation of an adreno shader program */
+struct ir3_compiler;
struct ir3;
struct ir3_instruction;
struct ir3_block;
struct ir3_info {
+ uint32_t gpu_id;
uint16_t sizedwords;
uint16_t instrs_count; /* expanded to account for rpt's */
/* NOTE: max_reg, etc, does not include registers not touched
int8_t max_reg; /* highest GPR # used by shader */
int8_t max_half_reg;
int16_t max_const;
+
+ /* number of sync bits: */
+ uint16_t ss, sy;
};
struct ir3_register {
IR3_REG_CONST = 0x001,
IR3_REG_IMMED = 0x002,
IR3_REG_HALF = 0x004,
- IR3_REG_RELATIV= 0x008,
- IR3_REG_R = 0x010,
+ /* high registers are used for some things in compute shaders,
+ * for example. Seems to be for things that are global to all
+ * threads in a wave, so possibly these are global/shared by
+ * all the threads in the wave?
+ */
+ IR3_REG_HIGH = 0x008,
+ IR3_REG_RELATIV= 0x010,
+ IR3_REG_R = 0x020,
/* Most instructions, it seems, can do float abs/neg but not
* integer. The CP pass needs to know what is intended (int or
* float) in order to do the right thing. For this reason the
* bitwise not, so split that out into a new flag to make it
* more clear.
*/
- IR3_REG_FNEG = 0x020,
- IR3_REG_FABS = 0x040,
- IR3_REG_SNEG = 0x080,
- IR3_REG_SABS = 0x100,
- IR3_REG_BNOT = 0x200,
- IR3_REG_EVEN = 0x400,
- IR3_REG_POS_INF= 0x800,
+ IR3_REG_FNEG = 0x040,
+ IR3_REG_FABS = 0x080,
+ IR3_REG_SNEG = 0x100,
+ IR3_REG_SABS = 0x200,
+ IR3_REG_BNOT = 0x400,
+ IR3_REG_EVEN = 0x800,
+ IR3_REG_POS_INF= 0x1000,
/* (ei) flag, end-input? Set on last bary, presumably to signal
* that the shader needs no more input:
*/
- IR3_REG_EI = 0x1000,
+ IR3_REG_EI = 0x2000,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
- IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */
- IR3_REG_IA = 0x4000, /* meta-input dst is "assigned" */
- IR3_REG_ADDR = 0x8000, /* register is a0.x */
+ IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */
+ IR3_REG_ARRAY = 0x8000,
+
} flags;
union {
/* normal registers:
uint32_t uim_val;
float fim_val;
/* relative: */
- int offset;
+ struct {
+ uint16_t id;
+ int16_t offset;
+ } array;
};
- /* for IR3_REG_SSA, src registers contain ptr back to
- * assigning instruction.
+ /* For IR3_REG_SSA, src registers contain ptr back to assigning
+ * instruction.
+ *
+ * For IR3_REG_ARRAY, the pointer is back to the last dependent
+ * array access (although the net effect is the same, it points
+ * back to a previous instruction that we depend on).
*/
struct ir3_instruction *instr;
};
};
+/*
+ * Stupid/simple growable array implementation:
+ */
+#define DECLARE_ARRAY(type, name) \
+ unsigned name ## _count, name ## _sz; \
+ type * name;
+
+#define array_insert(ctx, arr, val) do { \
+ if (arr ## _count == arr ## _sz) { \
+ arr ## _sz = MAX2(2 * arr ## _sz, 16); \
+ arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \
+ } \
+ arr[arr ##_count++] = val; \
+ } while (0)
+
struct ir3_instruction {
struct ir3_block *block;
- int category;
opc_t opc;
enum {
/* (sy) flag is set on first instruction, and after sample
IR3_INSTR_P = 0x080,
IR3_INSTR_S = 0x100,
IR3_INSTR_S2EN = 0x200,
+ IR3_INSTR_G = 0x400,
+ IR3_INSTR_SAT = 0x800,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
IR3_INSTR_MARK = 0x1000,
+ IR3_INSTR_UNUSED= 0x2000,
} flags;
int repeat;
#ifdef DEBUG
char inv;
char comp;
int immed;
+ struct ir3_block *target;
} cat0;
struct {
type_t src_type, dst_type;
} cat5;
struct {
type_t type;
- int offset;
- int iim_val;
+ int src_offset;
+ int dst_offset;
+ int iim_val : 3; /* for ldgb/stgb, # of components */
+ int d : 3;
+ bool typed : 1;
} cat6;
+ struct {
+ unsigned w : 1; /* write */
+ unsigned r : 1; /* read */
+ unsigned l : 1; /* local */
+ unsigned g : 1; /* global */
+ } cat7;
/* for meta-instructions, just used to hold extra data
* before instruction scheduling, etc
*/
struct {
int off; /* component/offset */
} fo;
- struct {
- int aid;
- } fi;
- struct {
- struct ir3_block *if_block, *else_block;
- } flow;
struct {
struct ir3_block *block;
} inout;
-
- /* XXX keep this as big as all other union members! */
- uint32_t info[3];
};
/* transient values used during various algorithms: */
* result of moving a const to a reg would have a low cost, so to
* it could make sense to duplicate the instruction at various
* points where the result is needed to reduce register footprint.
- *
- * DEPTH_UNUSED used to mark unused instructions after depth
- * calculation pass.
*/
-#define DEPTH_UNUSED ~0
unsigned depth;
+ /* When we get to the RA stage, we no longer need depth, but
+ * we do need instruction's position/name:
+ */
+ struct {
+ uint16_t ip;
+ uint16_t name;
+ };
};
+ /* used for per-pass extra instruction data.
+ */
+ void *data;
+
/* Used during CP and RA stages. For fanin and shader inputs/
* outputs where we need a sequence of consecutive registers,
* keep track of each src instructions left (ie 'n-1') and right
/* an instruction can reference at most one address register amongst
* it's src/dst registers. Beyond that, you need to insert mov's.
+ *
+ * NOTE: do not write this directly, use ir3_instr_set_address()
*/
struct ir3_instruction *address;
- /* in case of a instruction with relative dst instruction, we need to
- * capture the dependency on the fanin for the previous values of
- * the array elements. Since we don't know at compile time actually
- * which array elements are written, this serves to preserve the
- * unconditional write to array elements prior to the conditional
- * write.
+ /* Tracking for additional dependent instructions. Used to handle
+ * barriers, WAR hazards for arrays/SSBOs/etc.
+ */
+ DECLARE_ARRAY(struct ir3_instruction *, deps);
+
+ /*
+ * From PoV of instruction scheduling, not execution (ie. ignores global/
+ * local distinction):
+ * shared image atomic SSBO everything
+ * barrier()/ - R/W R/W R/W R/W X
+ * groupMemoryBarrier()
+ * memoryBarrier() - R/W R/W
+ * (but only images declared coherent?)
+ * memoryBarrierAtomic() - R/W
+ * memoryBarrierBuffer() - R/W
+ * memoryBarrierImage() - R/W
+ * memoryBarrierShared() - R/W
*
- * TODO only cat1 can do indirect write.. we could maybe move this
- * into instr->cat1.fanin (but would require the frontend to insert
- * the extra mov)
+ * TODO I think for SSBO/image/shared, in cases where we can determine
+ * which variable is accessed, we don't need to care about accesses to
+ * different variables (unless declared coherent??)
*/
- struct ir3_instruction *fanin;
+ enum {
+ IR3_BARRIER_EVERYTHING = 1 << 0,
+ IR3_BARRIER_SHARED_R = 1 << 1,
+ IR3_BARRIER_SHARED_W = 1 << 2,
+ IR3_BARRIER_IMAGE_R = 1 << 3,
+ IR3_BARRIER_IMAGE_W = 1 << 4,
+ IR3_BARRIER_BUFFER_R = 1 << 5,
+ IR3_BARRIER_BUFFER_W = 1 << 6,
+ IR3_BARRIER_ARRAY_R = 1 << 7,
+ IR3_BARRIER_ARRAY_W = 1 << 8,
+ } barrier_class, barrier_conflict;
+
+ /* Entry in ir3_block's instruction list: */
+ struct list_head node;
+
+ int use_count; /* currently just updated/used by cp */
- struct ir3_instruction *next;
#ifdef DEBUG
uint32_t serialno;
#endif
static inline struct ir3_instruction *
ir3_neighbor_first(struct ir3_instruction *instr)
{
- while (instr->cp.left)
+ int cnt = 0;
+ while (instr->cp.left) {
instr = instr->cp.left;
+ if (++cnt > 0xffff) {
+ debug_assert(0);
+ break;
+ }
+ }
return instr;
}
while (instr->cp.right) {
num++;
instr = instr->cp.right;
+ if (num > 0xffff) {
+ debug_assert(0);
+ break;
+ }
}
return num;
}
-struct ir3_heap_chunk;
-
struct ir3 {
- unsigned instrs_count, instrs_sz;
- struct ir3_instruction **instrs;
+ struct ir3_compiler *compiler;
+
+ unsigned ninputs, noutputs;
+ struct ir3_instruction **inputs;
+ struct ir3_instruction **outputs;
/* Track bary.f (and ldlv) instructions.. this is needed in
* scheduling to ensure that all varying fetches happen before
* threads in a group are killed before the last bary.f gets
* a chance to signal end of input (ei).
*/
- unsigned baryfs_count, baryfs_sz;
- struct ir3_instruction **baryfs;
+ DECLARE_ARRAY(struct ir3_instruction *, baryfs);
/* Track all indirect instructions (read and write). To avoid
* deadlock scenario where an address register gets scheduled,
* convenient list of instructions that reference some address
* register simplifies this.
*/
- unsigned indirects_count, indirects_sz;
- struct ir3_instruction **indirects;
+ DECLARE_ARRAY(struct ir3_instruction *, indirects);
- struct ir3_block *block;
- unsigned heap_idx;
- struct ir3_heap_chunk *chunk;
+ /* and same for instructions that consume predicate register: */
+ DECLARE_ARRAY(struct ir3_instruction *, predicates);
+
+ /* Track texture sample instructions which need texture state
+ * patched in (for astc-srgb workaround):
+ */
+ DECLARE_ARRAY(struct ir3_instruction *, astc_srgb);
+
+ /* List of blocks: */
+ struct list_head block_list;
+
+ /* List of ir3_array's: */
+ struct list_head array_list;
+
+#ifdef DEBUG
+ unsigned block_count, instr_count;
+#endif
+};
+
+typedef struct nir_register nir_register;
+
+struct ir3_array {
+ struct list_head node;
+ unsigned length;
+ unsigned id;
+
+ nir_register *r;
+
+ /* To avoid array write's from getting DCE'd, keep track of the
+ * most recent write. Any array access depends on the most
+ * recent write. This way, nothing depends on writes after the
+ * last read. But all the writes that happen before that have
+ * something depending on them
+ */
+ struct ir3_instruction *last_write;
+
+ /* extra stuff used in RA pass: */
+ unsigned base; /* base vreg name */
+ unsigned reg; /* base physical reg */
+ uint16_t start_ip, end_ip;
};
+struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
+
+typedef struct nir_block nir_block;
+
struct ir3_block {
+ struct list_head node;
struct ir3 *shader;
- unsigned ntemporaries, ninputs, noutputs;
- /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
- struct ir3_instruction **temporaries;
- struct ir3_instruction **inputs;
- struct ir3_instruction **outputs;
- /* only a single address register: */
- struct ir3_instruction *address;
- struct ir3_block *parent;
- struct ir3_instruction *head;
+
+ const nir_block *nblock;
+
+ struct list_head instr_list; /* list of ir3_instruction */
+
+ /* each block has either one or two successors.. in case of
+ * two successors, 'condition' decides which one to follow.
+ * A block preceding an if/else has two successors.
+ */
+ struct ir3_instruction *condition;
+ struct ir3_block *successors[2];
+
+ unsigned predecessors_count;
+ struct ir3_block **predecessors;
+
+ uint16_t start_ip, end_ip;
+
+ /* Track instructions which do not write a register but other-
+ * wise must not be discarded (such as kill, stg, etc)
+ */
+ DECLARE_ARRAY(struct ir3_instruction *, keeps);
+
+ /* used for per-pass extra block data. Mainly used right
+ * now in RA step to track livein/liveout.
+ */
+ void *data;
+
+#ifdef DEBUG
+ uint32_t serialno;
+#endif
};
-struct ir3 * ir3_create(void);
+static inline uint32_t
+block_id(struct ir3_block *block)
+{
+#ifdef DEBUG
+ return block->serialno;
+#else
+ return (uint32_t)(unsigned long)block;
+#endif
+}
+
+struct ir3 * ir3_create(struct ir3_compiler *compiler,
+ unsigned nin, unsigned nout);
void ir3_destroy(struct ir3 *shader);
void * ir3_assemble(struct ir3 *shader,
struct ir3_info *info, uint32_t gpu_id);
void * ir3_alloc(struct ir3 *shader, int sz);
-struct ir3_block * ir3_block_create(struct ir3 *shader,
- unsigned ntmp, unsigned nin, unsigned nout);
+struct ir3_block * ir3_block_create(struct ir3 *shader);
-struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
- int category, opc_t opc);
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc);
struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
- int category, opc_t opc, int nreg);
+ opc_t opc, int nreg);
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
+void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep);
const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
+struct ir3_register * ir3_reg_clone(struct ir3 *shader,
+ struct ir3_register *reg);
+void ir3_instr_set_address(struct ir3_instruction *instr,
+ struct ir3_instruction *addr);
static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
{
return false;
}
-static inline void ir3_clear_mark(struct ir3 *shader)
-{
- /* TODO would be nice to drop the instruction array.. for
- * new compiler, _clear_mark() is all we use it for, and
- * we could probably manage a linked list instead..
- *
- * Also, we'll probably want to mark instructions within
- * a block, so tracking the list of instrs globally is
- * unlikely to be what we want.
- */
- unsigned i;
- for (i = 0; i < shader->instrs_count; i++) {
- struct ir3_instruction *instr = shader->instrs[i];
- instr->flags &= ~IR3_INSTR_MARK;
- }
-}
+void ir3_block_clear_mark(struct ir3_block *block);
+void ir3_clear_mark(struct ir3 *shader);
+
+unsigned ir3_count_instructions(struct ir3 *ir);
static inline int ir3_instr_regno(struct ir3_instruction *instr,
struct ir3_register *reg)
static inline bool is_flow(struct ir3_instruction *instr)
{
- return (instr->category == 0);
+ return (opc_cat(instr->opc) == 0);
}
static inline bool is_kill(struct ir3_instruction *instr)
{
- return is_flow(instr) && (instr->opc == OPC_KILL);
+ return instr->opc == OPC_KILL;
}
static inline bool is_nop(struct ir3_instruction *instr)
{
- return is_flow(instr) && (instr->opc == OPC_NOP);
+ return instr->opc == OPC_NOP;
}
/* Is it a non-transformative (ie. not type changing) mov? This can
*/
static inline bool is_same_type_mov(struct ir3_instruction *instr)
{
- struct ir3_register *dst = instr->regs[0];
+ struct ir3_register *dst;
+
+ switch (instr->opc) {
+ case OPC_MOV:
+ if (instr->cat1.src_type != instr->cat1.dst_type)
+ return false;
+ break;
+ case OPC_ABSNEG_F:
+ case OPC_ABSNEG_S:
+ if (instr->flags & IR3_INSTR_SAT)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ dst = instr->regs[0];
/* mov's that write to a0.x or p0.x are special: */
if (dst->num == regid(REG_P0, 0))
if (dst->num == regid(REG_A0, 0))
return false;
- if ((instr->category == 1) &&
- (instr->cat1.src_type == instr->cat1.dst_type))
- return true;
- if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
- (instr->opc == OPC_ABSNEG_S)))
- return true;
- return false;
+ if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
+ return false;
+
+ return true;
}
static inline bool is_alu(struct ir3_instruction *instr)
{
- return (1 <= instr->category) && (instr->category <= 3);
+ return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3);
}
static inline bool is_sfu(struct ir3_instruction *instr)
{
- return (instr->category == 4);
+ return (opc_cat(instr->opc) == 4);
}
static inline bool is_tex(struct ir3_instruction *instr)
{
- return (instr->category == 5);
+ return (opc_cat(instr->opc) == 5);
}
static inline bool is_mem(struct ir3_instruction *instr)
{
- return (instr->category == 6);
+ return (opc_cat(instr->opc) == 6);
+}
+
+static inline bool is_barrier(struct ir3_instruction *instr)
+{
+ return (opc_cat(instr->opc) == 7);
+}
+
+static inline bool
+is_store(struct ir3_instruction *instr)
+{
+ /* these instructions, the "destination" register is
+ * actually a source, the address to store to.
+ */
+ switch (instr->opc) {
+ case OPC_STG:
+ case OPC_STGB:
+ case OPC_STIB:
+ case OPC_STP:
+ case OPC_STL:
+ case OPC_STLW:
+ case OPC_L2G:
+ case OPC_G2L:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_load(struct ir3_instruction *instr)
+{
+ switch (instr->opc) {
+ case OPC_LDG:
+ case OPC_LDGB:
+ case OPC_LDL:
+ case OPC_LDP:
+ case OPC_L2G:
+ case OPC_LDLW:
+ case OPC_LDC:
+ case OPC_LDLV:
+ /* probably some others too.. */
+ return true;
+ default:
+ return false;
+ }
}
static inline bool is_input(struct ir3_instruction *instr)
* interpolation.. fortunately inloc is the first src
* register in either case
*/
- if (is_mem(instr) && (instr->opc == OPC_LDLV))
+ switch (instr->opc) {
+ case OPC_LDLV:
+ case OPC_BARY_F:
return true;
- return (instr->category == 2) && (instr->opc == OPC_BARY_F);
+ default:
+ return false;
+ }
+}
+
+static inline bool is_bool(struct ir3_instruction *instr)
+{
+ switch (instr->opc) {
+ case OPC_CMPS_F:
+ case OPC_CMPS_S:
+ case OPC_CMPS_U:
+ return true;
+ default:
+ return false;
+ }
}
static inline bool is_meta(struct ir3_instruction *instr)
* might actually contribute some instructions to the final
* result?
*/
- return (instr->category == -1);
+ return (opc_cat(instr->opc) == -1);
}
static inline bool writes_addr(struct ir3_instruction *instr)
{
if (instr->regs_count > 0) {
struct ir3_register *dst = instr->regs[0];
- return !!(dst->flags & IR3_REG_ADDR);
+ return reg_num(dst) == REG_A0;
}
return false;
}
/* TODO better name */
static inline struct ir3_instruction *ssa(struct ir3_register *reg)
{
- if (reg->flags & IR3_REG_SSA)
+ if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
+ debug_assert(!(reg->instr && (reg->instr->flags & IR3_INSTR_UNUSED)));
return reg->instr;
+ }
return NULL;
}
static inline bool reg_gpr(struct ir3_register *r)
{
- if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
+ if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
return false;
return true;
}
+static inline type_t half_type(type_t type)
+{
+ switch (type) {
+ case TYPE_F32: return TYPE_F16;
+ case TYPE_U32: return TYPE_U16;
+ case TYPE_S32: return TYPE_S16;
+ case TYPE_F16:
+ case TYPE_U16:
+ case TYPE_S16:
+ return type;
+ default:
+ assert(0);
+ return ~0;
+ }
+}
+
/* some cat2 instructions (ie. those which are not float) can embed an
* immediate:
*/
}
}
-#define array_insert(arr, val) do { \
- if (arr ## _count == arr ## _sz) { \
- arr ## _sz = MAX2(2 * arr ## _sz, 16); \
- arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \
- } \
- arr[arr ##_count++] = val; \
- } while (0)
+#define MASK(n) ((1 << (n)) - 1)
/* iterator for an instructions's sources (reg), also returns src #: */
#define foreach_src_n(__srcreg, __n, __instr) \
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
{
- if (instr->fanin)
- return instr->regs_count + 2;
+ unsigned cnt = instr->regs_count + instr->deps_count;
if (instr->address)
- return instr->regs_count + 1;
- return instr->regs_count;
+ cnt++;
+ return cnt;
}
static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
{
- if (n == (instr->regs_count + 1))
- return instr->fanin;
- if (n == (instr->regs_count + 0))
+ if (n == (instr->regs_count + instr->deps_count))
return instr->address;
+ if (n >= instr->regs_count)
+ return instr->deps[n - instr->regs_count];
return ssa(instr->regs[n]);
}
+static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
+{
+ if (n == (instr->regs_count + instr->deps_count))
+ return false;
+ if (n >= instr->regs_count)
+ return true;
+ return false;
+}
+
#define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
/* iterator for an instruction's SSA sources (instr), also returns src #: */
#define foreach_ssa_src_n(__srcinst, __n, __instr) \
- if ((__instr)->regs_count) \
- for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
- if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
+ for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
+ if ((__srcinst = __ssa_src_n(__instr, __n)))
/* iterator for an instruction's SSA sources (instr): */
#define foreach_ssa_src(__srcinst, __instr) \
/* dump: */
-#include <stdio.h>
-void ir3_dump(struct ir3 *shader, const char *name,
- struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
- FILE *f);
-void ir3_dump_instr_single(struct ir3_instruction *instr);
-void ir3_dump_instr_list(struct ir3_instruction *instr);
-
-/* flatten if/else: */
-int ir3_block_flatten(struct ir3_block *block);
+void ir3_print(struct ir3 *ir);
+void ir3_print_instr(struct ir3_instruction *instr);
/* depth calculation: */
int ir3_delayslots(struct ir3_instruction *assigner,
struct ir3_instruction *consumer, unsigned n);
-void ir3_block_depth(struct ir3_block *block);
+void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
+void ir3_depth(struct ir3 *ir);
/* copy-propagate: */
-void ir3_block_cp(struct ir3_block *block);
+struct ir3_shader_variant;
+void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
-/* group neightbors and insert mov's to resolve conflicts: */
-void ir3_block_group(struct ir3_block *block);
+/* group neighbors and insert mov's to resolve conflicts: */
+void ir3_group(struct ir3 *ir);
/* scheduling: */
-int ir3_block_sched(struct ir3_block *block);
+void ir3_sched_add_deps(struct ir3 *ir);
+int ir3_sched(struct ir3 *ir);
/* register assignment: */
-int ir3_block_ra(struct ir3_block *block, enum shader_t type,
+struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx);
+int ir3_ra(struct ir3 *ir3, enum shader_t type,
bool frag_coord, bool frag_face);
/* legalize: */
-void ir3_block_legalize(struct ir3_block *block,
- bool *has_samp, int *max_bary);
+void ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary);
/* ************************************************************************* */
/* instruction helpers */
static inline struct ir3_instruction *
ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
{
- struct ir3_instruction *instr =
- ir3_instr_create(block, 1, 0);
+ struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
ir3_reg_create(instr, 0, 0); /* dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+ if (src->regs[0]->flags & IR3_REG_ARRAY) {
+ struct ir3_register *src_reg =
+ ir3_reg_create(instr, 0, IR3_REG_ARRAY);
+ src_reg->array = src->regs[0]->array;
+ src_reg->instr = src;
+ } else {
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+ }
+ debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
instr->cat1.src_type = type;
instr->cat1.dst_type = type;
return instr;
ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
type_t src_type, type_t dst_type)
{
- struct ir3_instruction *instr =
- ir3_instr_create(block, 1, 0);
+ struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
ir3_reg_create(instr, 0, 0); /* dst */
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
instr->cat1.src_type = src_type;
instr->cat1.dst_type = dst_type;
+ debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
return instr;
}
-#define INSTR1(CAT, name) \
+static inline struct ir3_instruction *
+ir3_NOP(struct ir3_block *block)
+{
+ return ir3_instr_create(block, OPC_NOP);
+}
+
+#define INSTR0(name) \
+static inline struct ir3_instruction * \
+ir3_##name(struct ir3_block *block) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create(block, OPC_##name); \
+ return instr; \
+}
+
+#define INSTR1(name) \
static inline struct ir3_instruction * \
ir3_##name(struct ir3_block *block, \
struct ir3_instruction *a, unsigned aflags) \
{ \
struct ir3_instruction *instr = \
- ir3_instr_create(block, CAT, OPC_##name); \
+ ir3_instr_create(block, OPC_##name); \
ir3_reg_create(instr, 0, 0); /* dst */ \
ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
return instr; \
}
-#define INSTR2(CAT, name) \
+#define INSTR2(name) \
static inline struct ir3_instruction * \
ir3_##name(struct ir3_block *block, \
struct ir3_instruction *a, unsigned aflags, \
struct ir3_instruction *b, unsigned bflags) \
{ \
struct ir3_instruction *instr = \
- ir3_instr_create(block, CAT, OPC_##name); \
+ ir3_instr_create(block, OPC_##name); \
ir3_reg_create(instr, 0, 0); /* dst */ \
ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
return instr; \
}
-#define INSTR3(CAT, name) \
+#define INSTR3(name) \
static inline struct ir3_instruction * \
ir3_##name(struct ir3_block *block, \
struct ir3_instruction *a, unsigned aflags, \
struct ir3_instruction *c, unsigned cflags) \
{ \
struct ir3_instruction *instr = \
- ir3_instr_create(block, CAT, OPC_##name); \
+ ir3_instr_create(block, OPC_##name); \
+ ir3_reg_create(instr, 0, 0); /* dst */ \
+ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
+ ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
+ ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \
+ return instr; \
+}
+
+#define INSTR4(name) \
+static inline struct ir3_instruction * \
+ir3_##name(struct ir3_block *block, \
+ struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags, \
+ struct ir3_instruction *c, unsigned cflags, \
+ struct ir3_instruction *d, unsigned dflags) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create2(block, OPC_##name, 5); \
+ ir3_reg_create(instr, 0, 0); /* dst */ \
+ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
+ ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
+ ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \
+ ir3_reg_create(instr, 0, IR3_REG_SSA | dflags)->instr = d; \
+ return instr; \
+}
+
+#define INSTR4F(f, name) \
+static inline struct ir3_instruction * \
+ir3_##name##_##f(struct ir3_block *block, \
+ struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags, \
+ struct ir3_instruction *c, unsigned cflags, \
+ struct ir3_instruction *d, unsigned dflags) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create2(block, OPC_##name, 5); \
ir3_reg_create(instr, 0, 0); /* dst */ \
ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \
+ ir3_reg_create(instr, 0, IR3_REG_SSA | dflags)->instr = d; \
+ instr->flags |= IR3_INSTR_##f; \
return instr; \
}
/* cat0 instructions: */
-INSTR1(0, KILL);
+INSTR0(BR)
+INSTR0(JUMP)
+INSTR1(KILL)
+INSTR0(END)
/* cat2 instructions, most 2 src but some 1 src: */
-INSTR2(2, ADD_F)
-INSTR2(2, MIN_F)
-INSTR2(2, MAX_F)
-INSTR2(2, MUL_F)
-INSTR1(2, SIGN_F)
-INSTR2(2, CMPS_F)
-INSTR1(2, ABSNEG_F)
-INSTR2(2, CMPV_F)
-INSTR1(2, FLOOR_F)
-INSTR1(2, CEIL_F)
-INSTR1(2, RNDNE_F)
-INSTR1(2, RNDAZ_F)
-INSTR1(2, TRUNC_F)
-INSTR2(2, ADD_U)
-INSTR2(2, ADD_S)
-INSTR2(2, SUB_U)
-INSTR2(2, SUB_S)
-INSTR2(2, CMPS_U)
-INSTR2(2, CMPS_S)
-INSTR2(2, MIN_U)
-INSTR2(2, MIN_S)
-INSTR2(2, MAX_U)
-INSTR2(2, MAX_S)
-INSTR1(2, ABSNEG_S)
-INSTR2(2, AND_B)
-INSTR2(2, OR_B)
-INSTR1(2, NOT_B)
-INSTR2(2, XOR_B)
-INSTR2(2, CMPV_U)
-INSTR2(2, CMPV_S)
-INSTR2(2, MUL_U)
-INSTR2(2, MUL_S)
-INSTR2(2, MULL_U)
-INSTR1(2, BFREV_B)
-INSTR1(2, CLZ_S)
-INSTR1(2, CLZ_B)
-INSTR2(2, SHL_B)
-INSTR2(2, SHR_B)
-INSTR2(2, ASHR_B)
-INSTR2(2, BARY_F)
-INSTR2(2, MGEN_B)
-INSTR2(2, GETBIT_B)
-INSTR1(2, SETRM)
-INSTR1(2, CBITS_B)
-INSTR2(2, SHB)
-INSTR2(2, MSAD)
+INSTR2(ADD_F)
+INSTR2(MIN_F)
+INSTR2(MAX_F)
+INSTR2(MUL_F)
+INSTR1(SIGN_F)
+INSTR2(CMPS_F)
+INSTR1(ABSNEG_F)
+INSTR2(CMPV_F)
+INSTR1(FLOOR_F)
+INSTR1(CEIL_F)
+INSTR1(RNDNE_F)
+INSTR1(RNDAZ_F)
+INSTR1(TRUNC_F)
+INSTR2(ADD_U)
+INSTR2(ADD_S)
+INSTR2(SUB_U)
+INSTR2(SUB_S)
+INSTR2(CMPS_U)
+INSTR2(CMPS_S)
+INSTR2(MIN_U)
+INSTR2(MIN_S)
+INSTR2(MAX_U)
+INSTR2(MAX_S)
+INSTR1(ABSNEG_S)
+INSTR2(AND_B)
+INSTR2(OR_B)
+INSTR1(NOT_B)
+INSTR2(XOR_B)
+INSTR2(CMPV_U)
+INSTR2(CMPV_S)
+INSTR2(MUL_U)
+INSTR2(MUL_S)
+INSTR2(MULL_U)
+INSTR1(BFREV_B)
+INSTR1(CLZ_S)
+INSTR1(CLZ_B)
+INSTR2(SHL_B)
+INSTR2(SHR_B)
+INSTR2(ASHR_B)
+INSTR2(BARY_F)
+INSTR2(MGEN_B)
+INSTR2(GETBIT_B)
+INSTR1(SETRM)
+INSTR1(CBITS_B)
+INSTR2(SHB)
+INSTR2(MSAD)
/* cat3 instructions: */
-INSTR3(3, MAD_U16)
-INSTR3(3, MADSH_U16)
-INSTR3(3, MAD_S16)
-INSTR3(3, MADSH_M16)
-INSTR3(3, MAD_U24)
-INSTR3(3, MAD_S24)
-INSTR3(3, MAD_F16)
-INSTR3(3, MAD_F32)
-INSTR3(3, SEL_B16)
-INSTR3(3, SEL_B32)
-INSTR3(3, SEL_S16)
-INSTR3(3, SEL_S32)
-INSTR3(3, SEL_F16)
-INSTR3(3, SEL_F32)
-INSTR3(3, SAD_S16)
-INSTR3(3, SAD_S32)
+INSTR3(MAD_U16)
+INSTR3(MADSH_U16)
+INSTR3(MAD_S16)
+INSTR3(MADSH_M16)
+INSTR3(MAD_U24)
+INSTR3(MAD_S24)
+INSTR3(MAD_F16)
+INSTR3(MAD_F32)
+INSTR3(SEL_B16)
+INSTR3(SEL_B32)
+INSTR3(SEL_S16)
+INSTR3(SEL_S32)
+INSTR3(SEL_F16)
+INSTR3(SEL_F32)
+INSTR3(SAD_S16)
+INSTR3(SAD_S32)
/* cat4 instructions: */
-INSTR1(4, RCP)
-INSTR1(4, RSQ)
-INSTR1(4, LOG2)
-INSTR1(4, EXP2)
-INSTR1(4, SIN)
-INSTR1(4, COS)
-INSTR1(4, SQRT)
+INSTR1(RCP)
+INSTR1(RSQ)
+INSTR1(LOG2)
+INSTR1(EXP2)
+INSTR1(SIN)
+INSTR1(COS)
+INSTR1(SQRT)
/* cat5 instructions: */
-INSTR1(5, DSX)
-INSTR1(5, DSY)
+INSTR1(DSX)
+INSTR1(DSY)
static inline struct ir3_instruction *
ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
struct ir3_instruction *sam;
struct ir3_register *reg;
- sam = ir3_instr_create(block, 5, opc);
+ sam = ir3_instr_create(block, opc);
sam->flags |= flags;
ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
if (src0) {
}
/* cat6 instructions: */
-INSTR2(6, LDLV)
-INSTR2(6, LDG)
+INSTR2(LDLV)
+INSTR2(LDG)
+INSTR2(LDL)
+INSTR3(STG)
+INSTR3(STL)
+INSTR3(LDGB)
+INSTR4(STGB)
+INSTR4(STIB)
+INSTR1(RESINFO)
+INSTR1(RESFMT)
+INSTR2(ATOMIC_ADD)
+INSTR2(ATOMIC_SUB)
+INSTR2(ATOMIC_XCHG)
+INSTR2(ATOMIC_INC)
+INSTR2(ATOMIC_DEC)
+INSTR2(ATOMIC_CMPXCHG)
+INSTR2(ATOMIC_MIN)
+INSTR2(ATOMIC_MAX)
+INSTR2(ATOMIC_AND)
+INSTR2(ATOMIC_OR)
+INSTR2(ATOMIC_XOR)
+INSTR4F(G, ATOMIC_ADD)
+INSTR4F(G, ATOMIC_SUB)
+INSTR4F(G, ATOMIC_XCHG)
+INSTR4F(G, ATOMIC_INC)
+INSTR4F(G, ATOMIC_DEC)
+INSTR4F(G, ATOMIC_CMPXCHG)
+INSTR4F(G, ATOMIC_MIN)
+INSTR4F(G, ATOMIC_MAX)
+INSTR4F(G, ATOMIC_AND)
+INSTR4F(G, ATOMIC_OR)
+INSTR4F(G, ATOMIC_XOR)
+
+/* cat7 instructions: */
+INSTR0(BAR)
+INSTR0(FENCE)
/* ************************************************************************* */
/* split this out or find some helper to use.. like main/bitset.h.. */
static inline unsigned regmask_idx(struct ir3_register *reg)
{
- unsigned num = reg->num;
+ unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
debug_assert(num < MAX_REG);
if (reg->flags & IR3_REG_HALF)
num += MAX_REG;