if (instr->opc == OPC_STLW && n == 0)
return false;
+ if (instr->opc == OPC_LDLW && n == 0)
+ return false;
+
/* disallow CP into anything but the SSBO slot argument for
* atomics:
*/
if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
return false;
- /* as with atomics, ldib on a6xx can only have immediate for
- * SSBO slot argument
+ /* as with atomics, ldib and ldc on a6xx can only have immediate
+ * for SSBO slot argument
*/
- if ((instr->opc == OPC_LDIB) && (n != 0))
+ if ((instr->opc == OPC_LDIB || instr->opc == OPC_LDC) && (n != 0))
return false;
}
const_state->immediates_size += 4;
const_state->immediates = realloc (const_state->immediates,
const_state->immediates_size * sizeof(const_state->immediates[0]));
+
+ for (int i = const_state->immediate_idx; i < const_state->immediates_size * 4; i++)
+ const_state->immediates[i / 4].val[i % 4] = 0xd0d0d0d0;
}
for (i = 0; i < const_state->immediate_idx; i++) {
if (!valid_flags(instr, n, new_flags)) {
/* See if lowering an immediate to const would help. */
if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
- bool f_opcode = (ir3_cat2_float(instr->opc) ||
- ir3_cat3_float(instr->opc)) ? true : false;
+ bool f_opcode = (is_cat2_float(instr->opc) ||
+ is_cat3_float(instr->opc)) ? true : false;
debug_assert(new_flags & IR3_REG_IMMED);
(src_reg->array.offset == 0))
return false;
+ /* When narrowing constant from 32b to 16b, it seems
+ * to work only for float. So we should do this only with
+ * float opcodes.
+ */
+ if (src->cat1.dst_type == TYPE_F16) {
+ if (instr->opc == OPC_MOV && !type_float(instr->cat1.src_type))
+ return false;
+ if (!is_cat2_float(instr->opc) && !is_cat3_float(instr->opc))
+ return false;
+ }
+
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
return true;
} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
- bool f_opcode = (ir3_cat2_float(instr->opc) ||
- ir3_cat3_float(instr->opc)) ? true : false;
+ bool f_opcode = (is_cat2_float(instr->opc) ||
+ is_cat3_float(instr->opc)) ? true : false;
/* See if lowering an immediate to const would help. */
instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags, f_opcode);
bool progress;
do {
progress = false;
- foreach_src_n(reg, n, instr) {
+ foreach_src_n (reg, n, instr) {
struct ir3_instruction *src = ssa(reg);
if (!src)
}
}
- /* Handle converting a sam.s2en (taking samp/tex idx params via
- * register) into a normal sam (encoding immediate samp/tex idx)
- * if they are immediate. This saves some instructions and regs
- * in the common case where we know samp/tex at compile time:
+ /* Handle converting a sam.s2en (taking samp/tex idx params via register)
+ * into a normal sam (encoding immediate samp/tex idx) if they are
+ * immediate. This saves some instructions and regs in the common case
+ * where we know samp/tex at compile time. This needs to be done in the
+ * frontend for bindless tex, though, so don't replicate it here.
*/
if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) &&
+ !(instr->flags & IR3_INSTR_B) &&
!(ir3_shader_debug & IR3_DBG_FORCES2EN)) {
/* The first src will be a collect, if both of it's
* two sources are mov from imm, then we can
*/
debug_assert(instr->deps_count == 0);
- foreach_ssa_src(src, instr) {
+ foreach_ssa_src (src, instr) {
src->use_count++;
}
}
ir3_clear_mark(ir);
struct ir3_instruction *out;
- foreach_output_n(out, n, ir) {
+ foreach_output_n (out, n, ir) {
instr_cp(&ctx, out);
ir->outputs[n] = eliminate_output_mov(out);
}