Noticed in shaders with branching, where we ended up scheduling delay
slots near the start of a block for the uniforms reset setup.
total instructions in shared programs: 93970 -> 93951 (-0.02%)
instructions in affected programs: 3117 -> 3098 (-0.61%)
3DMMES performance +0.423087% +/- 0.133521% (n=9,10)
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+ /* Full immediate loads don't read any registers. */
+ if (sig == QPU_SIG_LOAD_IMM)
+ return false;
+
uint32_t src_muxes[] = {
QPU_GET_FIELD(inst, QPU_ADD_A),
QPU_GET_FIELD(inst, QPU_ADD_B),
if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_BRANCH)
return false;
+ /* Load immediates don't read any registers. */
+ if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM)
+ return false;
+
for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
if (!ignore_a &&
src_regs[i].mux == QPU_MUX_A &&