X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_instruction_selection.cpp;h=8ce4afc829cd72c9cc1c47be959df16bafff7571;hp=5bd389a5be415be281b28b922f7c4c89a73de7c7;hb=156fd58cdacb28a7fca88fc9ffc84c3cdfbbb8f3;hpb=9c1e0d86a813af7609acf42cfe6bec7401d6405f diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5bd389a5be4..8ce4afc829c 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1793,84 +1793,6 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } break; } - case nir_op_fmax3: { - if (dst.regClass() == v2b) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f16, dst, false); - } else if (dst.regClass() == v1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f32, dst, ctx->block->fp_mode.must_flush_denorms32); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_fmin3: { - if (dst.regClass() == v2b) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f16, dst, false); - } else if (dst.regClass() == v1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f32, dst, ctx->block->fp_mode.must_flush_denorms32); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_fmed3: { - if (dst.regClass() == v2b) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f16, dst, false); - } else if (dst.regClass() == v1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f32, dst, ctx->block->fp_mode.must_flush_denorms32); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_umax3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_u32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_umin3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_u32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_umed3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_u32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_imax3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_i32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_imin3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_i32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } - case nir_op_imed3: { - if (dst.size() == 1) { - emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_i32, dst); - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } case nir_op_cube_face_coord: { Temp in = get_alu_src(ctx, instr->src[0], 3); Temp src[3] = { emit_extract_vector(ctx, in, 0, v1), @@ -2918,7 +2840,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_eq_f16, aco_opcode::v_cmp_eq_f32, aco_opcode::v_cmp_eq_f64); break; } - case nir_op_fne: { + case nir_op_fneu: { emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_neq_f16, aco_opcode::v_cmp_neq_f32, aco_opcode::v_cmp_neq_f64); break; } @@ -5345,7 +5267,7 @@ void visit_discard(isel_context* ctx, nir_intrinsic_instr *instr) assert(nir_instr_is_last(&instr->instr)); ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(ctx->block->index, linear_target); return; } @@ -5358,14 +5280,14 @@ void visit_discard(isel_context* ctx, nir_intrinsic_instr *instr) ctx->cf_info.nir_to_aco[instr->instr.block->index] = idx; /* remove critical edges from linear CFG */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* break_block = ctx->program->create_and_insert_block(); break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind |= block_kind_uniform; add_linear_edge(idx, break_block); add_linear_edge(break_block->index, linear_target); bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* continue_block = ctx->program->create_and_insert_block(); continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; @@ -9149,7 +9071,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) /* uniform break - directly jump out of the loop */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(idx, logical_target); return; } @@ -9171,7 +9093,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) /* uniform continue - directly jump to the loop header */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(idx, logical_target); return; } @@ -9187,7 +9109,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) } /* remove critical edges from linear CFG */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* break_block = ctx->program->create_and_insert_block(); break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind |= block_kind_uniform; @@ -9197,7 +9119,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr) logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; add_linear_edge(break_block->index, logical_target); bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); Block* continue_block = ctx->program->create_and_insert_block(); continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; @@ -9297,7 +9219,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) append_logical_end(ctx->block); ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform; Builder bld(ctx->program, ctx->block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); unsigned loop_preheader_idx = ctx->block->index; Block loop_exit = Block(); @@ -9332,7 +9254,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; break_block->kind = block_kind_uniform; bld.reset(break_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(block_idx, break_block); add_linear_edge(break_block->index, &loop_exit); @@ -9340,7 +9262,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth; continue_block->kind = block_kind_uniform; bld.reset(continue_block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_linear_edge(block_idx, continue_block); add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]); @@ -9356,7 +9278,7 @@ static void visit_loop(isel_context *ctx, nir_loop *loop) } bld.reset(ctx->block); - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); } /* Fixup phis in loop header from unreachable blocks. @@ -9434,7 +9356,9 @@ static void begin_divergent_if_then(isel_context *ctx, if_context *ic, Temp cond /* branch to linear then block */ assert(cond.regClass() == ctx->program->lane_mask); aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(cond); ctx->block->instructions.push_back(std::move(branch)); @@ -9473,7 +9397,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) append_logical_end(BB_then_logical); /* branch from logical then block to invert block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_logical->index, &ic->BB_invert); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -9489,7 +9415,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) BB_then_linear->kind |= block_kind_uniform; add_linear_edge(ic->BB_if_idx, BB_then_linear); /* branch from linear then block to invert block */ - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then_linear->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_linear->index, &ic->BB_invert); @@ -9498,7 +9426,9 @@ static void begin_divergent_if_else(isel_context *ctx, if_context *ic) ic->invert_idx = ctx->block->index; /* branch to linear else block (skip else) */ - branch.reset(create_instruction(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(ic->cond); ctx->block->instructions.push_back(std::move(branch)); @@ -9527,7 +9457,9 @@ static void end_divergent_if(isel_context *ctx, if_context *ic) /* branch from logical else block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else_logical->index, &ic->BB_endif); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -9545,7 +9477,9 @@ static void end_divergent_if(isel_context *ctx, if_context *ic) add_linear_edge(ic->invert_idx, BB_else_linear); /* branch from linear else block to endif block */ - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else_linear->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else_linear->index, &ic->BB_endif); @@ -9582,7 +9516,9 @@ static void begin_uniform_if_then(isel_context *ctx, if_context *ic, Temp cond) aco_ptr branch; aco_opcode branch_opcode = aco_opcode::p_cbranch_z; - branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 0)); + branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); branch->operands[0] = Operand(cond); branch->operands[0].setFixed(scc); ctx->block->instructions.emplace_back(std::move(branch)); @@ -9614,7 +9550,9 @@ static void begin_uniform_if_else(isel_context *ctx, if_context *ic) append_logical_end(BB_then); /* branch from then block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_then->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then->index, &ic->BB_endif); if (!ic->then_branch_divergent) @@ -9641,7 +9579,9 @@ static void end_uniform_if(isel_context *ctx, if_context *ic) append_logical_end(BB_else); /* branch from then block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); + branch->definitions[0] = {ctx->program->allocateId(), s2}; + branch->definitions[0].setHint(vcc); BB_else->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else->index, &ic->BB_endif); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -11002,7 +10942,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream)); append_logical_end(ctx.block); ctx.block->kind |= block_kind_uniform; - bld.branch(aco_opcode::p_cbranch_z, cond); + bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), cond); BB_endif.kind |= ctx.block->kind & block_kind_top_level; @@ -11064,7 +11004,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, append_logical_end(ctx.block); /* branch from then block to endif block */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_edge(ctx.block->index, &BB_endif); ctx.block->kind |= block_kind_uniform; @@ -11086,7 +11026,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, append_logical_end(BB_else); /* branch from else block to endif block */ - bld.branch(aco_opcode::p_branch); + bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2))); add_edge(BB_else->index, &BB_endif); BB_else->kind |= block_kind_uniform; @@ -11104,4 +11044,64 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, cleanup_cfg(program); } + +void select_trap_handler_shader(Program *program, struct nir_shader *shader, + ac_shader_config* config, + struct radv_shader_args *args) +{ + assert(args->options->chip_class == GFX8); + + init_program(program, compute_cs, args->shader_info, + args->options->chip_class, args->options->family, config); + + isel_context ctx = {}; + ctx.program = program; + ctx.args = args; + ctx.options = args->options; + ctx.stage = program->stage; + + ctx.block = ctx.program->create_and_insert_block(); + ctx.block->loop_nest_depth = 0; + ctx.block->kind = block_kind_top_level; + + program->workgroup_size = 1; /* XXX */ + + add_startpgm(&ctx); + append_logical_start(ctx.block); + + Builder bld(ctx.program, ctx.block); + + /* Load the buffer descriptor from TMA. */ + bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg{ttmp4}, s4), + Operand(PhysReg{tma}, s2), Operand(0u)); + + /* Store TTMP0-TTMP1. */ + bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4), + Operand(0u), Operand(PhysReg{ttmp0}, s2), memory_sync_info(), true); + + uint32_t hw_regs_idx[] = { + 2, /* HW_REG_STATUS */ + 3, /* HW_REG_TRAP_STS */ + 4, /* HW_REG_HW_ID */ + 7, /* HW_REG_IB_STS */ + }; + + /* Store some hardware registers. */ + for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) { + /* "((size - 1) << 11) | register" */ + bld.sopk(aco_opcode::s_getreg_b32, Definition(PhysReg{ttmp8}, s1), + ((20 - 1) << 11) | hw_regs_idx[i]); + + bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4), + Operand(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(), true); + } + + program->config->float_mode = program->blocks[0].fp_mode.val; + + append_logical_end(ctx.block); + ctx.block->kind |= block_kind_uniform; + bld.sopp(aco_opcode::s_endpgm); + + cleanup_cfg(program); +} }