From 2ae27b96efca6473e9671a22d60f6b9496001413 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 11 Mar 2020 11:02:20 +0100 Subject: [PATCH] aco: change live_out variables to std::unordered_set Improves performance of live_var_analysis for larger shaders Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_ir.h | 12 ++++++++++-- src/amd/compiler/aco_live_var_analysis.cpp | 2 +- src/amd/compiler/aco_register_allocation.cpp | 6 +++--- src/amd/compiler/aco_spill.cpp | 5 +++-- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 2a52d6b44de..8ff5a9e220a 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -1442,9 +1443,16 @@ private: uint32_t allocationID = 1; }; +struct TempHash { + std::size_t operator()(Temp t) const { + return t.id(); + } +}; +using TempSet = std::unordered_set; + struct live { /* live temps out per block */ - std::vector> live_out; + std::vector live_out; /* register demand (sgpr/vgpr) per instruction per block */ std::vector> register_demand; }; @@ -1471,7 +1479,7 @@ void value_numbering(Program* program); void optimize(Program* program); void setup_reduce_temp(Program* program); void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options); -void register_allocation(Program *program, std::vector>& live_out_per_block); +void register_allocation(Program *program, std::vector& live_out_per_block); void ssa_elimination(Program* program); void lower_to_hw_instr(Program* program); void schedule_program(Program* program, live& live_vars); diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index c6995e44e8d..0378dbaf335 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -91,7 +91,7 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, register_demand.resize(block->instructions.size()); block->register_demand = RegisterDemand(); - std::set live = lives.live_out[block->index]; + TempSet live = lives.live_out[block->index]; /* add the live_out_exec to live */ bool exec_live = false; diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 2332677bd83..410d8c14d8c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1097,7 +1097,7 @@ bool operand_can_use_reg(aco_ptr& instr, unsigned idx, PhysReg reg) } /* end namespace */ -void register_allocation(Program *program, std::vector>& live_out_per_block) +void register_allocation(Program *program, std::vector& live_out_per_block) { ra_ctx ctx(program); @@ -1270,7 +1270,7 @@ void register_allocation(Program *program, std::vector>& live_out Block& block = *it; /* first, compute the death points of all live vars within the block */ - std::set& live = live_out_per_block[block.index]; + TempSet& live = live_out_per_block[block.index]; std::vector>::reverse_iterator rit; for (rit = block.instructions.rbegin(); rit != block.instructions.rend(); ++rit) { @@ -1342,7 +1342,7 @@ void register_allocation(Program *program, std::vector>& live_out std::vector> sgpr_live_in(program->blocks.size()); for (Block& block : program->blocks) { - std::set& live = live_out_per_block[block.index]; + TempSet& live = live_out_per_block[block.index]; /* initialize register file */ assert(block.index != 0 || live.empty()); RegisterFile register_file; diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 1b8824c61f8..54b84488a0a 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -28,6 +28,7 @@ #include "sid.h" #include +#include #include /* @@ -213,7 +214,7 @@ void next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set& } -void compute_global_next_uses(spill_ctx& ctx, std::vector>& live_out) +void compute_global_next_uses(spill_ctx& ctx) { ctx.next_use_distances_start.resize(ctx.program->blocks.size()); ctx.next_use_distances_end.resize(ctx.program->blocks.size()); @@ -1764,7 +1765,7 @@ void spill(Program* program, live& live_vars, const struct radv_nir_compiler_opt /* initialize ctx */ spill_ctx ctx(register_target, program, live_vars.register_demand); - compute_global_next_uses(ctx, live_vars.live_out); + compute_global_next_uses(ctx); get_rematerialize_info(ctx); /* create spills and reloads */ -- 2.30.2