X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_reduce_assign.cpp;h=7bf7a6c3b685b9d8ddcf756782100f2200a08c0e;hb=e6366f9094326a2841058678174289827f504905;hp=28a779580a26f7f52cea75e037861973730b2e6a;hpb=56c06c79fcf32fdec67d6bc6141b6fa76a773c16;p=mesa.git diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp index 28a779580a2..7bf7a6c3b68 100644 --- a/src/amd/compiler/aco_reduce_assign.cpp +++ b/src/amd/compiler/aco_reduce_assign.cpp @@ -120,10 +120,15 @@ void setup_reduce_temp(Program* program) op == fmin64 || op == fmax64 || op == umin64 || op == umax64 || op == imin64 || op == imax64 || op == imul64; + bool gfx10_need_vtmp = op == imul8 || op == imax8 || op == imin8 || op == umin8 || + op == imul16 || op == imax16 || op == imin16 || op == umin16 || + op == iadd64; - if (program->chip_class >= GFX10 && cluster_size == 64 && op != gfx10_wave64_bpermute) + if (program->chip_class >= GFX10 && cluster_size == 64) need_vtmp = true; - if (program->chip_class >= GFX10 && op == iadd64) + if (program->chip_class >= GFX10 && gfx10_need_vtmp) + need_vtmp = true; + if (program->chip_class <= GFX7) need_vtmp = true; need_vtmp |= cluster_size == 32; @@ -153,12 +158,14 @@ void setup_reduce_temp(Program* program) instr->definitions[1] = bld.def(s2); /* scalar identity temporary */ - bool need_sitmp = program->chip_class >= GFX10 && cluster_size == 64; + bool need_sitmp = (program->chip_class <= GFX7 || program->chip_class >= GFX10) && instr->opcode != aco_opcode::p_reduce; if (instr->opcode == aco_opcode::p_exclusive_scan) { need_sitmp |= - (op == imin32 || op == imin64 || op == imax32 || op == imax64 || - op == fmin32 || op == fmin64 || op == fmax32 || op == fmax64 || - op == fmul64); + (op == imin8 || op == imin16 || op == imin32 || op == imin64 || + op == imax8 || op == imax16 || op == imax32 || op == imax64 || + op == fmin16 || op == fmin32 || op == fmin64 || + op == fmax16 || op == fmax32 || op == fmax64 || + op == fmul16 || op == fmul64); } if (need_sitmp) { instr->definitions[2] = bld.def(RegClass(RegType::sgpr, instr->operands[0].size())); @@ -172,7 +179,7 @@ void setup_reduce_temp(Program* program) clobber_vcc = true; if (clobber_vcc) - instr->definitions[4] = Definition(vcc, s2); + instr->definitions[4] = Definition(vcc, bld.lm); } } }