From 70f63c198863e60e844978e1ca2e9773159ca8d3 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 14 Oct 2019 17:21:04 +0100 Subject: [PATCH] aco: improve support for s_sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In particular, the messages needed for GS. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_builder_h.py | 27 +++++++++++++++++++ src/amd/compiler/aco_insert_NOPs.cpp | 8 ++++++ src/amd/compiler/aco_insert_waitcnt.cpp | 11 ++++++-- src/amd/compiler/aco_print_ir.cpp | 35 +++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index b8854810e47..18e4bf752ec 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -78,6 +78,33 @@ ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) aco_ptr create_s_mov(Definition dst, Operand src); +enum sendmsg { + sendmsg_none = 0, + _sendmsg_gs = 2, + _sendmsg_gs_done = 3, + sendmsg_save_wave = 4, + sendmsg_stall_wave_gen = 5, + sendmsg_halt_waves = 6, + sendmsg_ordered_ps_done = 7, + sendmsg_early_prim_dealloc = 8, + sendmsg_gs_alloc_req = 9, + sendmsg_id_mask = 0xf, +}; + +inline sendmsg +sendmsg_gs(bool cut, bool emit, unsigned stream) +{ + assert(stream < 4); + return (sendmsg)((unsigned)_sendmsg_gs | (cut << 4) | (emit << 5) | (stream << 8)); +} + +inline sendmsg +sendmsg_gs_done(bool cut, bool emit, unsigned stream) +{ + assert(stream < 4); + return (sendmsg)((unsigned)_sendmsg_gs_done | (cut << 4) | (emit << 5) | (stream << 8)); +} + class Builder { public: struct Result { diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 1ead0c04da5..b9eaaed96db 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -378,6 +378,14 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr& instr, } } } + } else if (instr->format == Format::SOPP) { + if (instr->opcode == aco_opcode::s_sendmsg && new_idx > 0) { + aco_ptr& pred = new_instructions.back(); + if (pred->isSALU() && + !pred->definitions.empty() && + pred->definitions[0].physReg() == m0) + return 1; + } } return 0; diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index a8343d18894..b74d5f57c25 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -65,6 +65,7 @@ enum wait_event : uint16_t { event_exp_mrt_null = 1 << 8, event_gds_gpr_lock = 1 << 9, event_vmem_gpr_lock = 1 << 10, + event_sendmsg = 1 << 11, }; enum counter_type : uint8_t { @@ -75,7 +76,7 @@ enum counter_type : uint8_t { }; static const uint16_t exp_events = event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock; -static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat; +static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg; static const uint16_t vm_events = event_vmem | event_flat; static const uint16_t vs_events = event_vmem_store; @@ -85,6 +86,7 @@ uint8_t get_counters_for_event(wait_event ev) case event_smem: case event_lds: case event_gds: + case event_sendmsg: return counter_lgkm; case event_vmem: return counter_vm; @@ -204,7 +206,7 @@ struct wait_entry { if (counter == counter_lgkm) { imm.lgkm = wait_imm::unset_counter; - events &= ~(event_smem | event_lds | event_gds); + events &= ~(event_smem | event_lds | event_gds | event_sendmsg); } if (counter == counter_vm) { @@ -685,6 +687,11 @@ void gen(Instruction* instr, wait_ctx& ctx) } break; } + case Format::SOPP: { + if (instr->opcode == aco_opcode::s_sendmsg || + instr->opcode == aco_opcode::s_sendmsghalt) + update_counters(ctx, event_sendmsg, get_barrier_interaction(instr)); + } default: break; } diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 81711a278c9..c17845c082d 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -192,6 +192,41 @@ static void print_instr_format_specific(struct Instruction *instr, FILE *output) case aco_opcode::s_set_gpr_idx_off: { break; } + case aco_opcode::s_sendmsg: { + unsigned id = imm & sendmsg_id_mask; + switch (id) { + case sendmsg_none: + fprintf(output, " sendmsg(MSG_NONE)"); + break; + case _sendmsg_gs: + fprintf(output, " sendmsg(gs%s%s, %u)", + imm & 0x10 ? ", cut" : "", imm & 0x20 ? ", emit" : "", imm >> 8); + break; + case _sendmsg_gs_done: + fprintf(output, " sendmsg(gs_done%s%s, %u)", + imm & 0x10 ? ", cut" : "", imm & 0x20 ? ", emit" : "", imm >> 8); + break; + case sendmsg_save_wave: + fprintf(output, " sendmsg(save_wave)"); + break; + case sendmsg_stall_wave_gen: + fprintf(output, " sendmsg(stall_wave_gen)"); + break; + case sendmsg_halt_waves: + fprintf(output, " sendmsg(halt_waves)"); + break; + case sendmsg_ordered_ps_done: + fprintf(output, " sendmsg(ordered_ps_done)"); + break; + case sendmsg_early_prim_dealloc: + fprintf(output, " sendmsg(early_prim_dealloc)"); + break; + case sendmsg_gs_alloc_req: + fprintf(output, " sendmsg(gs_alloc_req)"); + break; + } + break; + } default: { if (imm) fprintf(output, " imm:%u", imm); -- 2.30.2