*/
QOP_TEX_RESULT,
+ /**
+ * Insert the signal for switching threads in a threaded fragment
+ * shader. No value can be live in an accumulator across a thrsw.
+ *
+ * At the QPU level, this will have several delay slots before the
+ * switch happens. Those slots are the responsibility of the
+ * scheduler.
+ */
+ QOP_THRSW,
+
/* 32-bit immediate loaded to each SIMD channel */
QOP_LOAD_IMM,
*/
QOP_LOAD_IMM_I2,
+ QOP_ROT_MUL,
+
/* Jumps to block->successor[0] if the qinst->cond (as a
* QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note
* that block->successor[1] may be unset if the condition is ALWAYS.
enum qop op;
struct qreg dst;
- struct qreg *src;
+ struct qreg src[2];
bool sf;
+ bool cond_is_exec_mask;
uint8_t cond;
};
struct qreg execute;
struct qreg line_x, point_x, point_y;
+ /** boolean (~0 -> true) if the fragment has been discarded. */
struct qreg discard;
struct qreg payload_FRAG_Z;
struct qreg payload_FRAG_W;
struct list_head qpu_inst_list;
+ /* Pre-QPU-scheduled instruction containing the last THRSW */
+ uint64_t *last_thrsw;
+
uint64_t *qpu_insts;
uint32_t qpu_inst_count;
uint32_t qpu_inst_size;
uint32_t program_id;
uint32_t variant_id;
+
+ /* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH
+ * is used to hide texturing latency at the cost of limiting ourselves
+ * to the bottom half of physical reg space.
+ */
+ bool fs_threaded;
+
+ bool last_thrsw_at_top_level;
+
+ bool failed;
};
/* Special nir_load_input intrinsic index for loading the current TLB
struct qblock *qir_exit_block(struct vc4_compile *c);
struct qinst *qir_inst(enum qop op, struct qreg dst,
struct qreg src0, struct qreg src1);
-struct qinst *qir_inst4(enum qop op, struct qreg dst,
- struct qreg a,
- struct qreg b,
- struct qreg c,
- struct qreg d);
void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
struct qreg qir_uniform(struct vc4_compile *c,
enum quniform_contents contents,
struct qreg qir_get_temp(struct vc4_compile *c);
void qir_calculate_live_intervals(struct vc4_compile *c);
-int qir_get_op_nsrc(enum qop qop);
+int qir_get_nsrc(struct qinst *inst);
bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1)
{
struct qreg t = qir_get_temp(c);
- struct qinst *a = qir_MOV_dest(c, t, src0);
- struct qinst *b = qir_MOV_dest(c, t, src1);
- a->cond = cond;
- b->cond = qpu_cond_complement(cond);
+ qir_MOV_dest(c, t, src1);
+ qir_MOV_dest(c, t, src0)->cond = cond;
return t;
}
c->undef));
}
-static inline void
+/** Shifts the multiply output to the right by rot channels */
+static inline struct qreg
+qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot)
+{
+ return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef,
+ val,
+ qir_reg(QFILE_LOAD_IMM,
+ QPU_SMALL_IMM_MUL_ROT + rot)));
+}
+
+static inline struct qinst *
qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
struct qreg dest, struct qreg src)
{
- qir_MOV_dest(c, dest, src)->cond = cond;
+ struct qinst *mov = qir_MOV_dest(c, dest, src);
+ mov->cond = cond;
+ return mov;
}
static inline struct qinst *