/* Intrinsic is vectorized and should read 4 components regardless of writemask */
#define BI_VECTOR (1 << 8)
+/* Use a data register for src0/dest respectively, bypassing the usual
+ * register accessor. Mutually exclusive. */
+#define BI_DATA_REG_SRC (1 << 9)
+#define BI_DATA_REG_DEST (1 << 10)
+
/* It can't get any worse than csel4... can it? */
#define BIR_SRC_COUNT 4
bool back_to_back;
bool branch_conditional;
+ /* Assigned data register */
+ unsigned data_register;
+
/* Corresponds to the usual bit but shifted by a clause */
bool data_register_write_barrier;
/* Constants read by this clause. ISA limit. */
uint64_t constants[8];
unsigned constant_count;
+
+ /* What type of high latency instruction is here, basically */
+ unsigned clause_type;
} bi_clause;
typedef struct bi_block {