aco: change live_out variables to std::unordered_set
authorDaniel Schürmann <daniel@schuermann.dev>
Wed, 11 Mar 2020 10:02:20 +0000 (11:02 +0100)
committerMarge Bot <eric+marge@anholt.net>
Thu, 9 Apr 2020 15:08:57 +0000 (15:08 +0000)
Improves performance of live_var_analysis for larger shaders

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4130>

src/amd/compiler/aco_ir.h
src/amd/compiler/aco_live_var_analysis.cpp
src/amd/compiler/aco_register_allocation.cpp
src/amd/compiler/aco_spill.cpp

index 2a52d6b44de4c4ba7b5aefc95b0628243008738c..8ff5a9e220a623cefa8a7ce14c32251a7be80737 100644 (file)
@@ -27,6 +27,7 @@
 
 #include <vector>
 #include <set>
+#include <unordered_set>
 #include <bitset>
 #include <memory>
 
@@ -1442,9 +1443,16 @@ private:
    uint32_t allocationID = 1;
 };
 
+struct TempHash {
+   std::size_t operator()(Temp t) const {
+      return t.id();
+   }
+};
+using TempSet = std::unordered_set<Temp, TempHash>;
+
 struct live {
    /* live temps out per block */
-   std::vector<std::set<Temp>> live_out;
+   std::vector<TempSet> live_out;
    /* register demand (sgpr/vgpr) per instruction per block */
    std::vector<std::vector<RegisterDemand>> register_demand;
 };
@@ -1471,7 +1479,7 @@ void value_numbering(Program* program);
 void optimize(Program* program);
 void setup_reduce_temp(Program* program);
 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
-void register_allocation(Program *program, std::vector<std::set<Temp>>& live_out_per_block);
+void register_allocation(Program *program, std::vector<TempSet>& live_out_per_block);
 void ssa_elimination(Program* program);
 void lower_to_hw_instr(Program* program);
 void schedule_program(Program* program, live& live_vars);
index c6995e44e8d3f0e2f233d0a20f3d57a3c67f2f7c..0378dbaf335e87b6ded33892457fe1b0c6e2d7d2 100644 (file)
@@ -91,7 +91,7 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block,
 
    register_demand.resize(block->instructions.size());
    block->register_demand = RegisterDemand();
-   std::set<Temp> live = lives.live_out[block->index];
+   TempSet live = lives.live_out[block->index];
 
    /* add the live_out_exec to live */
    bool exec_live = false;
index 2332677bd8309ec71b21e5c14a415593a7400fc7..410d8c14d8c815e4d4a2338fd78b7e882514f094 100644 (file)
@@ -1097,7 +1097,7 @@ bool operand_can_use_reg(aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg)
 } /* end namespace */
 
 
-void register_allocation(Program *program, std::vector<std::set<Temp>>& live_out_per_block)
+void register_allocation(Program *program, std::vector<TempSet>& live_out_per_block)
 {
    ra_ctx ctx(program);
 
@@ -1270,7 +1270,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>>& live_out
       Block& block = *it;
 
       /* first, compute the death points of all live vars within the block */
-      std::set<Temp>& live = live_out_per_block[block.index];
+      TempSet& live = live_out_per_block[block.index];
 
       std::vector<aco_ptr<Instruction>>::reverse_iterator rit;
       for (rit = block.instructions.rbegin(); rit != block.instructions.rend(); ++rit) {
@@ -1342,7 +1342,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>>& live_out
    std::vector<std::bitset<128>> sgpr_live_in(program->blocks.size());
 
    for (Block& block : program->blocks) {
-      std::set<Temp>& live = live_out_per_block[block.index];
+      TempSet& live = live_out_per_block[block.index];
       /* initialize register file */
       assert(block.index != 0 || live.empty());
       RegisterFile register_file;
index 1b8824c61f88a471acaaafa2faa141a6cc5f7828..54b84488a0a4cba02a4f403a7aac98aaf25a4b97 100644 (file)
@@ -28,6 +28,7 @@
 #include "sid.h"
 
 #include <map>
+#include <set>
 #include <stack>
 
 /*
@@ -213,7 +214,7 @@ void next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set<uint32_t>&
 
 }
 
-void compute_global_next_uses(spill_ctx& ctx, std::vector<std::set<Temp>>& live_out)
+void compute_global_next_uses(spill_ctx& ctx)
 {
    ctx.next_use_distances_start.resize(ctx.program->blocks.size());
    ctx.next_use_distances_end.resize(ctx.program->blocks.size());
@@ -1764,7 +1765,7 @@ void spill(Program* program, live& live_vars, const struct radv_nir_compiler_opt
 
    /* initialize ctx */
    spill_ctx ctx(register_target, program, live_vars.register_demand);
-   compute_global_next_uses(ctx, live_vars.live_out);
+   compute_global_next_uses(ctx);
    get_rematerialize_info(ctx);
 
    /* create spills and reloads */