Easier than making more extensive use of rpt, and the more compact
shaders seem to bring some bit of performance boost. (Perhaps repeat
flag benefits are more than just instruction cache, possibly it saves
on instruction decode as well?)
Signed-off-by: Rob Clark <robclark@freedesktop.org>
return (instr->category == 0);
}
+static inline bool is_nop(struct ir3_instruction *instr)
+{
+ return is_flow(instr) && (instr->opc == OPC_NOP);
+}
+
static inline bool is_alu(struct ir3_instruction *instr)
{
return (1 <= instr->category) && (instr->category <= 3);
if ((shader->instrs_count == 0) && (n->category >= 5))
ir3_instr_create(block, 0, OPC_NOP);
+ if (is_nop(n) && shader->instrs_count) {
+ struct ir3_instruction *last =
+ shader->instrs[shader->instrs_count-1];
+ if (is_nop(last) && (last->repeat < 5)) {
+ last->repeat++;
+ last->flags |= n->flags;
+ continue;
+ }
+ }
+
shader->instrs[shader->instrs_count++] = n;
if (is_sfu(n))