X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_lower_to_hw_instr.cpp;h=e8c72485772573458274512eedfbed78452a94f5;hb=a537c9e73f86d8cb3f4a2b48b4143708b146f0fe;hp=f052f7ed79224f3a2a1a5ad379a4813bb025a044;hpb=5c0f82b0d72927b98157300f5c9017f99299fbd4;p=mesa.git diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index f052f7ed792..e8c72485772 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1637,6 +1637,21 @@ void handle_operands(std::map& copy_map, lower_context* ctx->program->statistics[statistic_copies] += ctx->instructions.size() - num_instructions_before; } +void emit_set_mode(Builder& bld, float_mode new_mode, bool set_round, bool set_denorm) +{ + if (bld.program->chip_class >= GFX10) { + if (set_round) + bld.sopp(aco_opcode::s_round_mode, -1, new_mode.round); + if (set_denorm) + bld.sopp(aco_opcode::s_denorm_mode, -1, new_mode.denorm); + } else if (set_round || set_denorm) { + /* "((size - 1) << 11) | register" (MODE is encoded as register 1) */ + Instruction *instr = bld.sopk(aco_opcode::s_setreg_imm32_b32, Operand(new_mode.val), (7 << 11) | 1).instr; + /* has to be a literal */ + instr->operands[0].setFixed(PhysReg{255}); + } +} + void lower_to_hw_instr(Program* program) { Block *discard_block = NULL; @@ -1648,28 +1663,28 @@ void lower_to_hw_instr(Program* program) ctx.program = program; Builder bld(program, &ctx.instructions); - bool set_mode = i == 0 && block->fp_mode.val != program->config->float_mode; - for (unsigned pred : block->linear_preds) { - if (program->blocks[pred].fp_mode.val != block->fp_mode.val) { - set_mode = true; - break; + float_mode config_mode; + config_mode.val = program->config->float_mode; + + bool set_round = i == 0 && block->fp_mode.round != config_mode.round; + bool set_denorm = i == 0 && block->fp_mode.denorm != config_mode.denorm; + if (block->kind & block_kind_top_level) { + for (unsigned pred : block->linear_preds) { + if (program->blocks[pred].fp_mode.round != block->fp_mode.round) + set_round = true; + if (program->blocks[pred].fp_mode.denorm != block->fp_mode.denorm) + set_denorm = true; } } - if (set_mode) { - /* only allow changing modes at top-level blocks so this doesn't break - * the "jump over empty blocks" optimization */ - assert(block->kind & block_kind_top_level); - uint32_t mode = block->fp_mode.val; - /* "((size - 1) << 11) | register" (MODE is encoded as register 1) */ - Instruction *instr = bld.sopk(aco_opcode::s_setreg_imm32_b32, Operand(mode), (7 << 11) | 1).instr; - /* has to be a literal */ - instr->operands[0].setFixed(PhysReg{255}); - } + /* only allow changing modes at top-level blocks so this doesn't break + * the "jump over empty blocks" optimization */ + assert((!set_round && !set_denorm) || (block->kind & block_kind_top_level)); + emit_set_mode(bld, block->fp_mode, set_round, set_denorm); for (size_t j = 0; j < block->instructions.size(); j++) { aco_ptr& instr = block->instructions[j]; aco_ptr mov; - if (instr->format == Format::PSEUDO) { + if (instr->format == Format::PSEUDO && instr->opcode != aco_opcode::p_unit_test) { Pseudo_instruction *pi = (Pseudo_instruction*)instr.get(); switch (instr->opcode) @@ -1839,6 +1854,7 @@ void lower_to_hw_instr(Program* program) emit_gfx10_wave64_bpermute(program, instr, bld); else unreachable("Current hardware supports ds_bpermute, don't emit p_bpermute."); + break; } default: break; @@ -1893,6 +1909,29 @@ void lower_to_hw_instr(Program* program) reduce->operands[2].physReg(), // vtmp reduce->definitions[2].physReg(), // sitmp reduce->operands[0], reduce->definitions[0]); + } else if (instr->format == Format::PSEUDO_BARRIER) { + Pseudo_barrier_instruction* barrier = static_cast(instr.get()); + + /* Anything larger than a workgroup isn't possible. Anything + * smaller requires no instructions and this pseudo instruction + * would only be included to control optimizations. */ + bool emit_s_barrier = barrier->exec_scope == scope_workgroup && + program->workgroup_size > program->wave_size; + + bld.insert(std::move(instr)); + if (emit_s_barrier) + bld.sopp(aco_opcode::s_barrier); + } else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) { + float_mode new_mode = block->fp_mode; + new_mode.round16_64 = fp_round_ne; + bool set_round = new_mode.round != block->fp_mode.round; + + emit_set_mode(bld, new_mode, set_round, false); + + instr->opcode = aco_opcode::v_cvt_f16_f32; + ctx.instructions.emplace_back(std::move(instr)); + + emit_set_mode(bld, block->fp_mode, set_round, false); } else { ctx.instructions.emplace_back(std::move(instr)); }