struct list_head instr_list;
struct ir3_legalize_state prev_state = bd->state;
struct ir3_legalize_state *state = &bd->state;
+ bool last_input_needs_ss = false;
/* our input state is the OR of all predecessor blocks' state: */
for (unsigned i = 0; i < block->predecessors_count; i++) {
ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val);
}
- if (last_n && is_barrier(last_n))
+ if (last_n && is_barrier(last_n)) {
n->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+ last_input_needs_ss = false;
+ }
/* NOTE: consider dst register too.. it could happen that
* texture sample instruction (for example) writes some
*/
if (regmask_get(&state->needs_ss, reg)) {
n->flags |= IR3_INSTR_SS;
+ last_input_needs_ss = false;
regmask_init(&state->needs_ss_war);
regmask_init(&state->needs_ss);
}
reg = n->regs[0];
if (regmask_get(&state->needs_ss_war, reg)) {
n->flags |= IR3_INSTR_SS;
+ last_input_needs_ss = false;
regmask_init(&state->needs_ss_war);
regmask_init(&state->needs_ss);
}
} else if (n->opc == OPC_RESINFO) {
regmask_set(&state->needs_ss, n->regs[0]);
ir3_NOP(block)->flags |= IR3_INSTR_SS;
+ last_input_needs_ss = false;
} else if (is_load(n)) {
/* seems like ldlv needs (ss) bit instead?? which is odd but
* makes a bunch of flat-varying tests start working on a4xx.
}
}
- if (is_input(n))
+ if (is_input(n)) {
last_input = n;
+ last_input_needs_ss |= (n->opc == OPC_LDLV);
+ }
last_n = n;
}
/* (ss)bary.f (ei)r63.x, 0, r0.x */
baryf = ir3_instr_create(block, OPC_BARY_F);
- baryf->flags |= IR3_INSTR_SS;
ir3_reg_create(baryf, regid(63, 0), 0);
ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
ir3_reg_create(baryf, regid(0, 0), 0);
last_input = baryf;
}
last_input->regs[0]->flags |= IR3_REG_EI;
+ if (last_input_needs_ss)
+ last_input->flags |= IR3_INSTR_SS;
}
if (last_rel)