X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_optimizer.cpp;h=0a43964457a7421700ec2fdf33eebeaa703c84df;hb=4e30191c9d3e5cdb1b65d4563f2b74e9bfdcf243;hp=0934a6f3272665fb4ba22ccd6a39cd84e91ed502;hpb=d16a7190a309ba87dc52760999dd3a6c033143ef;p=mesa.git diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 0934a6f3272..0a43964457a 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -34,6 +34,27 @@ namespace aco { +#ifndef NDEBUG +void perfwarn(Program *program, bool cond, const char *msg, Instruction *instr) +{ + if (cond) { + char *out; + size_t outsize; + FILE *memf = open_memstream(&out, &outsize); + + fprintf(memf, "%s: ", msg); + aco_print_instr(instr, memf); + fclose(memf); + + aco_perfwarn(program, out); + free(out); + + if (debug_flags & DEBUG_PERFWARN) + exit(1); + } +} +#endif + /** * The optimizer works in 4 phases: * (1) The first pass collects information for each ssa-def, @@ -52,7 +73,7 @@ namespace aco { struct mad_info { aco_ptr add_instr; uint32_t mul_temp_id; - uint32_t literal_idx; + uint16_t literal_idx; bool check_literal; mad_info(aco_ptr instr, uint32_t id) @@ -83,7 +104,7 @@ enum Label { label_add_sub = 1 << 17, label_bitwise = 1 << 18, label_minmax = 1 << 19, - label_fcmp = 1 << 20, + label_vopc = 1 << 20, label_uniform_bool = 1 << 21, label_constant_64bit = 1 << 22, label_uniform_bitwise = 1 << 23, @@ -94,19 +115,19 @@ enum Label { label_constant_16bit = 1 << 29, }; -static constexpr uint32_t instr_labels = label_vec | label_mul | label_mad | label_omod_success | label_clamp_success | - label_add_sub | label_bitwise | label_uniform_bitwise | label_minmax | label_fcmp; -static constexpr uint32_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool | +static constexpr uint64_t instr_labels = label_vec | label_mul | label_mad | label_omod_success | label_clamp_success | + label_add_sub | label_bitwise | label_uniform_bitwise | label_minmax | label_vopc; +static constexpr uint64_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool | label_omod2 | label_omod4 | label_omod5 | label_clamp | label_scc_invert | label_b2i; -static constexpr uint32_t val_labels = label_constant_32bit | label_constant_64bit | label_constant_16bit | label_literal | label_mad; +static constexpr uint32_t val_labels = label_constant_32bit | label_constant_64bit | label_constant_16bit | label_literal; struct ssa_info { - uint32_t val; + uint64_t label; union { + uint32_t val; Temp temp; Instruction* instr; }; - uint32_t label; ssa_info() : label(0) {} @@ -116,18 +137,21 @@ struct ssa_info { * (indicating the defining instruction), there is no need to clear * any other instr labels. */ if (new_label & instr_labels) - label &= ~temp_labels; /* instr and temp alias */ + label &= ~(temp_labels | val_labels); /* instr, temp and val alias */ if (new_label & temp_labels) { label &= ~temp_labels; - label &= ~instr_labels; /* instr and temp alias */ + label &= ~(instr_labels | val_labels); /* instr, temp and val alias */ } uint32_t const_labels = label_literal | label_constant_32bit | label_constant_64bit | label_constant_16bit; - if (new_label & const_labels) + if (new_label & const_labels) { label &= ~val_labels | const_labels; - else if (new_label & val_labels) + label &= ~(instr_labels | temp_labels); /* instr, temp and val alias */ + } else if (new_label & val_labels) { label &= ~val_labels; + label &= ~(instr_labels | temp_labels); /* instr, temp and val alias */ + } label |= new_label; } @@ -277,7 +301,7 @@ struct ssa_info { void set_mad(Instruction* mad, uint32_t mad_info_idx) { add_label(label_mad); - val = mad_info_idx; + mad->pass_flags = mad_info_idx; instr = mad; } @@ -427,15 +451,15 @@ struct ssa_info { return label & label_minmax; } - void set_fcmp(Instruction *fcmp_instr) + void set_vopc(Instruction *vopc_instr) { - add_label(label_fcmp); - instr = fcmp_instr; + add_label(label_vopc); + instr = vopc_instr; } - bool is_fcmp() + bool is_vopc() { - return label & label_fcmp; + return label & label_vopc; } void set_scc_needed() @@ -502,6 +526,18 @@ struct opt_ctx { std::vector uses; }; +struct CmpInfo { + aco_opcode ordered; + aco_opcode unordered; + aco_opcode ordered_swapped; + aco_opcode unordered_swapped; + aco_opcode inverse; + aco_opcode f32; + unsigned size; +}; + +ALWAYS_INLINE bool get_cmp_info(aco_opcode op, CmpInfo *info); + bool can_swap_operands(aco_ptr& instr) { if (instr->operands[0].isConstant() || @@ -509,35 +545,63 @@ bool can_swap_operands(aco_ptr& instr) return false; switch (instr->opcode) { + case aco_opcode::v_add_u32: + case aco_opcode::v_add_co_u32: + case aco_opcode::v_add_co_u32_e64: + case aco_opcode::v_add_i32: + case aco_opcode::v_add_f16: case aco_opcode::v_add_f32: + case aco_opcode::v_mul_f16: case aco_opcode::v_mul_f32: case aco_opcode::v_or_b32: case aco_opcode::v_and_b32: case aco_opcode::v_xor_b32: + case aco_opcode::v_max_f16: case aco_opcode::v_max_f32: + case aco_opcode::v_min_f16: case aco_opcode::v_min_f32: case aco_opcode::v_max_i32: case aco_opcode::v_min_i32: case aco_opcode::v_max_u32: case aco_opcode::v_min_u32: - case aco_opcode::v_cmp_eq_f32: - case aco_opcode::v_cmp_lg_f32: + case aco_opcode::v_max_i16: + case aco_opcode::v_min_i16: + case aco_opcode::v_max_u16: + case aco_opcode::v_min_u16: + case aco_opcode::v_max_i16_e64: + case aco_opcode::v_min_i16_e64: + case aco_opcode::v_max_u16_e64: + case aco_opcode::v_min_u16_e64: + return true; + case aco_opcode::v_sub_f16: + instr->opcode = aco_opcode::v_subrev_f16; return true; case aco_opcode::v_sub_f32: instr->opcode = aco_opcode::v_subrev_f32; return true; - case aco_opcode::v_cmp_lt_f32: - instr->opcode = aco_opcode::v_cmp_gt_f32; + case aco_opcode::v_sub_co_u32: + instr->opcode = aco_opcode::v_subrev_co_u32; return true; - case aco_opcode::v_cmp_ge_f32: - instr->opcode = aco_opcode::v_cmp_le_f32; + case aco_opcode::v_sub_u16: + instr->opcode = aco_opcode::v_subrev_u16; return true; - case aco_opcode::v_cmp_lt_i32: - instr->opcode = aco_opcode::v_cmp_gt_i32; + case aco_opcode::v_sub_u32: + instr->opcode = aco_opcode::v_subrev_u32; return true; - default: + default: { + CmpInfo info; + get_cmp_info(instr->opcode, &info); + if (info.ordered == instr->opcode) { + instr->opcode = info.ordered_swapped; + return true; + } + if (info.unordered == instr->opcode) { + instr->opcode = info.unordered_swapped; + return true; + } return false; } + } } bool can_use_VOP3(opt_ctx& ctx, const aco_ptr& instr) @@ -672,7 +736,7 @@ bool check_vop3_operands(opt_ctx& ctx, unsigned num_operands, Operand *operands) return true; } -bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp *base, uint32_t *offset) +bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp *base, uint32_t *offset, bool prevent_overflow) { Operand op = instr->operands[op_index]; @@ -694,6 +758,8 @@ bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp default: return false; } + if (prevent_overflow && !add_instr->definitions[0].isNUW()) + return false; if (add_instr->usesModifiers()) return false; @@ -711,7 +777,7 @@ bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp continue; uint32_t offset2 = 0; - if (parse_base_offset(ctx, add_instr, !i, base, &offset2)) { + if (parse_base_offset(ctx, add_instr, !i, base, &offset2, prevent_overflow)) { *offset += offset2; } else { *base = add_instr->operands[!i].getTemp(); @@ -758,7 +824,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) ASSERTED bool all_const = false; for (Operand& op : instr->operands) all_const = all_const && (!op.isTemp() || ctx.info[op.tempId()].is_constant_or_literal(32)); - perfwarn(all_const, "All instruction operands are constant", instr.get()); + perfwarn(ctx.program, all_const, "All instruction operands are constant", instr.get()); } for (unsigned i = 0; i < instr->operands.size(); i++) @@ -860,7 +926,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) unsigned bits = get_operand_size(instr, i); if (info.is_constant(bits) && alu_can_accept_constant(instr->opcode, i)) { Operand op = get_constant_op(ctx, info, bits); - perfwarn(instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get()); + perfwarn(ctx.program, instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get()); if (i == 0 || instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32) { instr->operands[i] = op; continue; @@ -884,6 +950,15 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) while (info.is_temp()) info = ctx.info[info.temp.id()]; + /* According to AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(), vaddr + * overflow for scratch accesses works only on GFX9+ and saddr overflow + * never works. Since swizzling is the only thing that separates + * scratch accesses and other accesses and swizzling changing how + * addressing works significantly, this probably applies to swizzled + * MUBUF accesses. */ + bool vaddr_prevent_overflow = mubuf->swizzled && ctx.program->chip_class < GFX9; + bool saddr_prevent_overflow = mubuf->swizzled; + if (mubuf->offen && i == 1 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) { assert(!mubuf->idxen); instr->operands[1] = Operand(v1); @@ -894,12 +969,14 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) instr->operands[2] = Operand((uint32_t) 0); mubuf->offset += info.val; continue; - } else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == v1 && mubuf->offset + offset < 4096) { + } else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) && + base.regClass() == v1 && mubuf->offset + offset < 4096) { assert(!mubuf->idxen); instr->operands[1].setTemp(base); mubuf->offset += offset; continue; - } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == s1 && mubuf->offset + offset < 4096) { + } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, saddr_prevent_overflow) && + base.regClass() == s1 && mubuf->offset + offset < 4096) { instr->operands[i].setTemp(base); mubuf->offset += offset; continue; @@ -914,7 +991,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) uint32_t offset; bool has_usable_ds_offset = ctx.program->chip_class >= GFX7; if (has_usable_ds_offset && - i == 0 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && + i == 0 && parse_base_offset(ctx, instr.get(), i, &base, &offset, false) && base.regClass() == instr->operands[i].regClass() && instr->opcode != aco_opcode::ds_swizzle_b32) { if (instr->opcode == aco_opcode::ds_write2_b32 || instr->opcode == aco_opcode::ds_read2_b32 || @@ -944,13 +1021,14 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) SMEM_instruction *smem = static_cast(instr.get()); Temp base; uint32_t offset; + bool prevent_overflow = smem->operands[0].size() > 2 || smem->prevent_overflow; if (i == 1 && info.is_constant_or_literal(32) && ((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) || (ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) || (ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) { instr->operands[i] = Operand(info.val); continue; - } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) { + } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) { bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4); if (soe && (!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) || @@ -969,8 +1047,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) new_instr->operands.back() = Operand(base); if (!smem->definitions.empty()) new_instr->definitions[0] = smem->definitions[0]; - new_instr->can_reorder = smem->can_reorder; - new_instr->barrier = smem->barrier; + new_instr->sync = smem->sync; new_instr->glc = smem->glc; new_instr->dlc = smem->dlc; new_instr->nv = smem->nv; @@ -995,6 +1072,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) if (instr->definitions.empty()) return; + if ((uint16_t) instr->format & (uint16_t) Format::VOPC) { + ctx.info[instr->definitions[0].tempId()].set_vopc(instr.get()); + return; + } + switch (instr->opcode) { case aco_opcode::p_create_vector: { bool copy_prop = instr->operands.size() == 1 && instr->operands[0].isTemp() && @@ -1312,6 +1394,14 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) ctx.info[instr->definitions[1].tempId()].set_temp(ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp()); ctx.info[instr->definitions[0].tempId()].set_uniform_bool(ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp()); break; + } else if (ctx.info[instr->operands[0].tempId()].is_vopc()) { + Instruction* vopc_instr = ctx.info[instr->operands[0].tempId()].instr; + /* Remove superfluous s_and when the VOPC instruction uses the same exec and thus already produces the same result */ + if (vopc_instr->pass_flags == instr->pass_flags) { + assert(instr->pass_flags > 0); + ctx.info[instr->definitions[0].tempId()].set_temp(vopc_instr->definitions[0].getTemp()); + break; + } } } /* fallthrough */ @@ -1344,28 +1434,6 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) case aco_opcode::v_max_i16: ctx.info[instr->definitions[0].tempId()].set_minmax(instr.get()); break; - #define CMP(cmp) \ - case aco_opcode::v_cmp_##cmp##_f16:\ - case aco_opcode::v_cmp_##cmp##_f32:\ - case aco_opcode::v_cmp_##cmp##_f64:\ - case aco_opcode::v_cmp_n##cmp##_f16:\ - case aco_opcode::v_cmp_n##cmp##_f32:\ - case aco_opcode::v_cmp_n##cmp##_f64: - CMP(lt) - CMP(eq) - CMP(le) - CMP(gt) - CMP(lg) - CMP(ge) - case aco_opcode::v_cmp_o_f16: - case aco_opcode::v_cmp_u_f16: - case aco_opcode::v_cmp_o_f32: - case aco_opcode::v_cmp_u_f32: - case aco_opcode::v_cmp_o_f64: - case aco_opcode::v_cmp_u_f64: - #undef CMP - ctx.info[instr->definitions[0].tempId()].set_fcmp(instr.get()); - break; case aco_opcode::s_cselect_b64: case aco_opcode::s_cselect_b32: if (instr->operands[0].constantEquals((unsigned) -1) && @@ -1390,38 +1458,34 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) } } -struct CmpInfo { - aco_opcode ordered; - aco_opcode unordered; - aco_opcode inverse; - aco_opcode f32; - unsigned size; -}; - ALWAYS_INLINE bool get_cmp_info(aco_opcode op, CmpInfo *info) { info->ordered = aco_opcode::num_opcodes; info->unordered = aco_opcode::num_opcodes; + info->ordered_swapped = aco_opcode::num_opcodes; + info->unordered_swapped = aco_opcode::num_opcodes; switch (op) { - #define CMP2(ord, unord, sz) \ + #define CMP2(ord, unord, ord_swap, unord_swap, sz) \ case aco_opcode::v_cmp_##ord##_f##sz:\ case aco_opcode::v_cmp_n##unord##_f##sz:\ info->ordered = aco_opcode::v_cmp_##ord##_f##sz;\ info->unordered = aco_opcode::v_cmp_n##unord##_f##sz;\ + info->ordered_swapped = aco_opcode::v_cmp_##ord_swap##_f##sz;\ + info->unordered_swapped = aco_opcode::v_cmp_n##unord_swap##_f##sz;\ info->inverse = op == aco_opcode::v_cmp_n##unord##_f##sz ? aco_opcode::v_cmp_##unord##_f##sz : aco_opcode::v_cmp_n##ord##_f##sz;\ info->f32 = op == aco_opcode::v_cmp_##ord##_f##sz ? aco_opcode::v_cmp_##ord##_f32 : aco_opcode::v_cmp_n##unord##_f32;\ info->size = sz;\ return true; - #define CMP(ord, unord) \ - CMP2(ord, unord, 16)\ - CMP2(ord, unord, 32)\ - CMP2(ord, unord, 64) - CMP(lt, /*n*/ge) - CMP(eq, /*n*/lg) - CMP(le, /*n*/gt) - CMP(gt, /*n*/le) - CMP(lg, /*n*/eq) - CMP(ge, /*n*/lt) + #define CMP(ord, unord, ord_swap, unord_swap) \ + CMP2(ord, unord, ord_swap, unord_swap, 16)\ + CMP2(ord, unord, ord_swap, unord_swap, 32)\ + CMP2(ord, unord, ord_swap, unord_swap, 64) + CMP(lt, /*n*/ge, gt, /*n*/le) + CMP(eq, /*n*/lg, eq, /*n*/lg) + CMP(le, /*n*/gt, ge, /*n*/lt) + CMP(gt, /*n*/le, lt, /*n*/le) + CMP(lg, /*n*/eq, lg, /*n*/eq) + CMP(ge, /*n*/lt, le, /*n*/gt) #undef CMP #undef CMP2 #define ORD_TEST(sz) \ @@ -1607,7 +1671,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr& instr) new_instr->definitions[0] = instr->definitions[0]; ctx.info[instr->definitions[0].tempId()].label = 0; - ctx.info[instr->definitions[0].tempId()].set_fcmp(new_instr); + ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr); instr.reset(new_instr); @@ -1677,7 +1741,7 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr& instr) new_instr->definitions[0] = instr->definitions[0]; ctx.info[instr->definitions[0].tempId()].label = 0; - ctx.info[instr->definitions[0].tempId()].set_fcmp(new_instr); + ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr); instr.reset(new_instr); @@ -1780,7 +1844,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr& in new_instr->definitions[0] = instr->definitions[0]; ctx.info[instr->definitions[0].tempId()].label = 0; - ctx.info[instr->definitions[0].tempId()].set_fcmp(new_instr); + ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr); instr.reset(new_instr); @@ -1829,7 +1893,7 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr& instr) new_instr->definitions[0] = instr->definitions[0]; ctx.info[instr->definitions[0].tempId()].label = 0; - ctx.info[instr->definitions[0].tempId()].set_fcmp(new_instr); + ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr); instr.reset(new_instr); @@ -1927,7 +1991,7 @@ void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr& bool combine_three_valu_op(opt_ctx& ctx, aco_ptr& instr, aco_opcode op2, aco_opcode new_op, const char *shuffle, uint8_t ops) { - uint32_t omod_clamp = ctx.info[instr->definitions[0].tempId()].label & + uint64_t omod_clamp = ctx.info[instr->definitions[0].tempId()].label & (label_omod_success | label_clamp_success); for (unsigned swap = 0; swap < 2; swap++) { @@ -1958,7 +2022,7 @@ bool combine_minmax(opt_ctx& ctx, aco_ptr& instr, aco_opcode opposi if (combine_three_valu_op(ctx, instr, instr->opcode, minmax3, "012", 1 | 2)) return true; - uint32_t omod_clamp = ctx.info[instr->definitions[0].tempId()].label & + uint64_t omod_clamp = ctx.info[instr->definitions[0].tempId()].label & (label_omod_success | label_clamp_success); /* min(-max(a, b), c) -> min3(-a, -b, c) * @@ -2212,7 +2276,7 @@ bool combine_clamp(opt_ctx& ctx, aco_ptr& instr, else return false; - uint32_t omod_clamp = ctx.info[instr->definitions[0].tempId()].label & + uint64_t omod_clamp = ctx.info[instr->definitions[0].tempId()].label & (label_omod_success | label_clamp_success); for (unsigned swap = 0; swap < 2; swap++) { @@ -2407,7 +2471,7 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr& instr) /* omod was successfully applied */ /* if the omod instruction is v_mad, we also have to change the original add */ if (ctx.info[instr->operands[idx].tempId()].is_mad()) { - Instruction* add_instr = ctx.mad_infos[ctx.info[instr->operands[idx].tempId()].val].add_instr.get(); + Instruction* add_instr = ctx.mad_infos[ctx.info[instr->operands[idx].tempId()].instr->pass_flags].add_instr.get(); if (ctx.info[instr->definitions[0].tempId()].is_clamp()) static_cast(add_instr)->clamp = true; add_instr->definitions[0] = instr->definitions[0]; @@ -2453,7 +2517,7 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr& instr) /* clamp was successfully applied */ /* if the clamp instruction is v_mad, we also have to change the original add */ if (ctx.info[instr->operands[idx].tempId()].is_mad()) { - Instruction* add_instr = ctx.mad_infos[ctx.info[instr->operands[idx].tempId()].val].add_instr.get(); + Instruction* add_instr = ctx.mad_infos[ctx.info[instr->operands[idx].tempId()].instr->pass_flags].add_instr.get(); add_instr->definitions[0] = instr->definitions[0]; } Instruction* clamp_instr = ctx.info[instr->operands[idx].tempId()].instr; @@ -2571,7 +2635,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr instr->opcode == aco_opcode::v_sub_f16 || instr->opcode == aco_opcode::v_subrev_f16; if (mad16 || mad32) { - bool need_fma = mad32 ? block.fp_mode.denorm32 != 0 : + bool need_fma = mad32 ? (block.fp_mode.denorm32 != 0 || ctx.program->chip_class >= GFX10_3) : (block.fp_mode.denorm16_64 != 0 || ctx.program->chip_class >= GFX10); if (need_fma && instr->definitions[0].isPrecise()) return; @@ -2875,7 +2939,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr& instr) mad_info* mad_info = NULL; if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) { - mad_info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].val]; + mad_info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].instr->pass_flags]; /* re-check mad instructions */ if (ctx.uses[mad_info->mul_temp_id]) { ctx.uses[mad_info->mul_temp_id]++; @@ -3057,7 +3121,7 @@ void apply_literals(opt_ctx &ctx, aco_ptr& instr) /* apply literals on MAD */ if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) { - mad_info* info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].val]; + mad_info* info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].instr->pass_flags]; if (info->check_literal && (ctx.uses[instr->operands[info->literal_idx].tempId()] == 0 || info->literal_idx == 2)) { aco_ptr new_mad;