X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Famd%2Fcompiler%2Faco_instruction_selection.cpp;h=f156acc535aa15b4be32cc96bf21663970e87057;hb=14d748eb28efa57507a3a84b7ef157b27ab27752;hp=6f1f8b4e07e701d59d173dd70c4a87f0eeb497bb;hpb=9c46e6fca323390f3cb74d6e865d2883a4fbd453;p=mesa.git diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 6f1f8b4e07e..f156acc535a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1793,84 +1793,6 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } break; } - case nir_op_fmax3: { - if (dst.regClass() == v2b) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f16, dst, false); - } else if (dst.regClass() == v1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f32, dst, ctx->block->fp_mode.must_flush_denorms32); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_fmin3: { - if (dst.regClass() == v2b) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f16, dst, false); - } else if (dst.regClass() == v1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f32, dst, ctx->block->fp_mode.must_flush_denorms32); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_fmed3: { - if (dst.regClass() == v2b) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f16, dst, false); - } else if (dst.regClass() == v1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f32, dst, ctx->block->fp_mode.must_flush_denorms32); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_umax3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_u32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_umin3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_u32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_umed3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_u32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_imax3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_i32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_imin3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_i32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_imed3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_i32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } case nir_op_cube_face_coord: { Temp in = get_alu_src(ctx, instr->src[0], 3); Temp src[3] = { emit_extract_vector(ctx, in, 0, v1), @@ -2839,51 +2761,32 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp offset = get_alu_src(ctx, instr->src[1]); Temp bits = get_alu_src(ctx, instr->src[2]); + if (dst.bytes() != 4) + unreachable("Unsupported BFE bit size"); + if (dst.type() == RegType::sgpr) { - Operand extract; nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src); nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src); if (const_offset && const_bits) { - uint32_t const_extract = (const_bits->u32 << 16) | const_offset->u32; - extract = Operand(const_extract); + uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f); + aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; + bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand(extract)); + } else if (instr->op == nir_op_ubfe) { + Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset); + Temp masked = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask); + bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset); } else { - Operand width; - if (const_bits) { - width = Operand(const_bits->u32 << 16); - } else { - width = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u)); - } - extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), offset, width); - } + Operand bits_op = const_bits ? Operand(const_bits->u32 << 16) : + bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u)); + Operand offset_op = const_offset ? Operand(const_offset->u32 & 0x1fu) : + bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand(0x1fu)); - aco_opcode opcode; - if (dst.regClass() == s1) { - if (instr->op == nir_op_ubfe) - opcode = aco_opcode::s_bfe_u32; - else - opcode = aco_opcode::s_bfe_i32; - } else if (dst.regClass() == s2) { - if (instr->op == nir_op_ubfe) - opcode = aco_opcode::s_bfe_u64; - else - opcode = aco_opcode::s_bfe_i64; - } else { - unreachable("Unsupported BFE bit size"); + Temp extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op); + bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract); } - bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract); - } else { - aco_opcode opcode; - if (dst.regClass() == v1) { - if (instr->op == nir_op_ubfe) - opcode = aco_opcode::v_bfe_u32; - else - opcode = aco_opcode::v_bfe_i32; - } else { - unreachable("Unsupported BFE bit size"); - } - + aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32; emit_vop3a_instruction(ctx, instr, opcode, dst); } break; @@ -5345,7 +5248,7 @@ void visit_discard(isel_context* ctx, nir_intrinsic_instr *instr) assert(nir_instr_is_last(&instr->instr)); ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(ctx->block->index, linear_target); return; } @@ -5358,14 +5261,14 @@ void visit_discard(isel_context* ctx, nir_intrinsic_instr *instr) ctx->cf_info.nir_to_aco[instr->instr.block->index] = idx; /* remove critical edges from linear CFG */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* break_block = ctx->program->create_and_insert_block(); break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind |= block_kind_uniform; add_linear_edge(idx, break_block); add_linear_edge(break_block->index, linear_target); bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* continue_block = ctx->program->create_and_insert_block(); continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; @@ -9149,7 +9052,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) /* uniform break - directly jump out of the loop */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(idx, logical_target); return; } @@ -9171,7 +9074,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) /* uniform continue - directly jump to the loop header */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(idx, logical_target); return; } @@ -9187,7 +9090,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) } /* remove critical edges from linear CFG */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* break_block = ctx->program->create_and_insert_block(); break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind |= block_kind_uniform; @@ -9197,7 +9100,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; add_linear_edge(break_block->index, logical_target); bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* continue_block = ctx->program->create_and_insert_block(); continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; @@ -9297,7 +9200,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) append_logical_end(ctx->block); ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform; Builder bld(ctx->program, ctx->block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); unsigned loop_preheader_idx = ctx->block->index; Block loop_exit = Block(); @@ -9332,7 +9235,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind = block_kind_uniform; bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(block_idx, break_block); add_linear_edge(break_block->index, &loop_exit); @@ -9340,7 +9243,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; continue_block->kind = block_kind_uniform; bld.reset(continue_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(block_idx, continue_block); add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]); @@ -9356,7 +9259,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) } bld.reset(ctx->block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); } /* Fixup phis in loop header from unreachable blocks. @@ -9434,7 +9337,9 @@ static void begin_divergent_if_then(isel_context *ctx, if_context *ic, Temp cond /* branch to linear then block */ assert(cond.regClass() == ctx->program->lane_mask); aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(cond); ctx->block->instructions.push_back(std::move(branch)); @@ -9473,7 +9378,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) append_logical_end(BB_then_logical); /* branch from logical then block to invert block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_logical->index, &ic->BB_invert); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -9489,7 +9396,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) BB_then_linear->kind |= block_kind_uniform; add_linear_edge(ic->BB_if_idx, BB_then_linear); /* branch from linear then block to invert block */ - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then_linear->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_linear->index, &ic->BB_invert); @@ -9498,7 +9407,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) ic->invert_idx = ctx->block->index; /* branch to linear else block (skip else) */ - branch.reset(create_instruction(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(ic->cond); ctx->block->instructions.push_back(std::move(branch)); @@ -9527,7 +9438,9 @@ static void end_divergent_if(isel_context *ctx, if_context *ic) /* branch from logical else block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else_logical->index, &ic->BB_endif); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -9545,7 +9458,9 @@ static void end_divergent_if(isel_context *ctx, if_context *ic) add_linear_edge(ic->invert_idx, BB_else_linear); /* branch from linear else block to endif block */ - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else_linear->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else_linear->index, &ic->BB_endif); @@ -9582,7 +9497,9 @@ static void begin_uniform_if_then(isel_context *ctx, if_context *ic, Temp cond) aco_ptr branch; aco_opcode branch_opcode = aco_opcode::p_cbranch_z; - branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(cond); branch->operands[0].setFixed(scc); ctx->block->instructions.emplace_back(std::move(branch)); @@ -9614,7 +9531,9 @@ static void begin_uniform_if_else(isel_context *ctx, if_context *ic) append_logical_end(BB_then); /* branch from then block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then->index, &ic->BB_endif); if (!ic->then_branch_divergent) @@ -9641,7 +9560,9 @@ static void end_uniform_if(isel_context *ctx, if_context *ic) append_logical_end(BB_else); /* branch from then block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else->index, &ic->BB_endif); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -11002,7 +10923,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream)); append_logical_end(ctx.block); ctx.block->kind |= block_kind_uniform; - bld.branch(aco_opcode::p_cbranch_z, cond); + bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), cond); BB_endif.kind |= ctx.block->kind & block_kind_top_level; @@ -11064,7 +10985,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, append_logical_end(ctx.block); /* branch from then block to endif block */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_edge(ctx.block->index, &BB_endif); ctx.block->kind |= block_kind_uniform; @@ -11086,7 +11007,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, append_logical_end(BB_else); /* branch from else block to endif block */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_edge(BB_else->index, &BB_endif); BB_else->kind |= block_kind_uniform;