/* same as before, except for the vector temporary instead of the reduce temporary */
unsigned cluster_size = static_cast<Pseudo_reduction_instruction *>(instr)->cluster_size;
bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 ||
- op == fmin64 || op == fmax64;
+ op == fmin64 || op == fmax64 || op == umin64 ||
+ op == umax64 || op == imin64 || op == imax64 ||
+ op == imul64;
+ bool gfx10_need_vtmp = op == imul8 || op == imax8 || op == imin8 || op == umin8 ||
+ op == imul16 || op == imax16 || op == imin16 || op == umin16 ||
+ op == iadd64;
+
if (program->chip_class >= GFX10 && cluster_size == 64)
need_vtmp = true;
+ if (program->chip_class >= GFX10 && gfx10_need_vtmp)
+ need_vtmp = true;
+ if (program->chip_class <= GFX7)
+ need_vtmp = true;
need_vtmp |= cluster_size == 32;
+
vtmp_in_loop |= need_vtmp && block.loop_nest_depth > 0;
if (need_vtmp && (int)last_top_level_block_idx != vtmp_inserted_at) {
vtmp = {program->allocateId(), vtmp.regClass()};
instr->definitions[1] = bld.def(s2);
/* scalar identity temporary */
- bool need_sitmp = program->chip_class >= GFX10 && cluster_size == 64;
+ bool need_sitmp = (program->chip_class <= GFX7 || program->chip_class >= GFX10) && instr->opcode != aco_opcode::p_reduce;
if (instr->opcode == aco_opcode::p_exclusive_scan) {
need_sitmp |=
- (op == imin32 || op == imin64 || op == imax32 || op == imax64 ||
- op == fmin32 || op == fmin64 || op == fmax32 || op == fmax64 ||
- op == fmul64);
+ (op == imin8 || op == imin16 || op == imin32 || op == imin64 ||
+ op == imax8 || op == imax16 || op == imax32 || op == imax64 ||
+ op == fmin16 || op == fmin32 || op == fmin64 ||
+ op == fmax16 || op == fmax32 || op == fmax64 ||
+ op == fmul16 || op == fmul64);
}
if (need_sitmp) {
instr->definitions[2] = bld.def(RegClass(RegType::sgpr, instr->operands[0].size()));
}
/* vcc clobber */
- if (op == iadd32 && program->chip_class < GFX9)
- instr->definitions[4] = Definition(vcc, s2);
+ bool clobber_vcc = false;
+ if ((op == iadd32 || op == imul64) && program->chip_class < GFX9)
+ clobber_vcc = true;
+ if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)
+ clobber_vcc = true;
+
+ if (clobber_vcc)
+ instr->definitions[4] = Definition(vcc, bld.lm);
}
}
}