From 393655d5cb2ae499783408d36a96e34257473fcf Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Fri, 27 Dec 2019 17:49:26 +0100 Subject: [PATCH] r600/sfn: add live range evaluation for the GPR The algoritm is basically a copy of the TGSI implementation without the array bits. Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/Makefile.sources | 2 + src/gallium/drivers/r600/meson.build | 2 + .../drivers/r600/sfn/sfn_instruction_base.cpp | 101 +- .../drivers/r600/sfn/sfn_instruction_base.h | 7 + .../drivers/r600/sfn/sfn_instruction_cf.cpp | 31 + .../drivers/r600/sfn/sfn_instruction_cf.h | 6 + .../r600/sfn/sfn_instruction_export.cpp | 1 + .../drivers/r600/sfn/sfn_liverange.cpp | 987 ++++++++++++++++++ src/gallium/drivers/r600/sfn/sfn_liverange.h | 314 ++++++ src/gallium/drivers/r600/sfn/sfn_nir.cpp | 4 + .../drivers/r600/sfn/sfn_shader_base.cpp | 75 ++ .../drivers/r600/sfn/sfn_value_gpr.cpp | 35 + src/gallium/drivers/r600/sfn/sfn_value_gpr.h | 7 + 13 files changed, 1571 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverange.cpp create mode 100644 src/gallium/drivers/r600/sfn/sfn_liverange.h diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index 8032e984229..f16bce16113 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -120,6 +120,8 @@ CXX_SOURCES = \ sfn/sfn_instruction_tex.h \ sfn/sfn_ir_to_assembly.cpp \ sfn/sfn_ir_to_assembly.h \ + sfn/sfn_liverange.cpp \ + sfn/sfn_liverange.h \ sfn/sfn_nir.cpp \ sfn/sfn_nir.h \ sfn/sfn_nir_lower_fs_out_to_vector.cpp \ diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build index 6df33788691..bfc17dae153 100644 --- a/src/gallium/drivers/r600/meson.build +++ b/src/gallium/drivers/r600/meson.build @@ -137,6 +137,8 @@ files_r600 = files( 'sfn/sfn_instruction_tex.h', 'sfn/sfn_ir_to_assembly.cpp', 'sfn/sfn_ir_to_assembly.h', + 'sfn/sfn_liverange.cpp', + 'sfn/sfn_liverange.h', 'sfn/sfn_nir.cpp', 'sfn/sfn_nir.h', 'sfn/sfn_nir_lower_fs_out_to_vector.cpp', diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp index 6930747550f..4e4e28f1436 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp @@ -29,11 +29,62 @@ #include #include "sfn_instruction_base.h" +#include "sfn_liverange.h" #include "sfn_valuepool.h" -#include "sfn_debug.h" namespace r600 { +ValueRemapper::ValueRemapper(std::vector& m, + ValueMap& values): + m_map(m), + m_values(values) +{ +} + +void ValueRemapper::remap(PValue& v) +{ + if (!v) + return; + if (v->type() == Value::gpr) { + v = remap_one_registers(v); + } else if (v->type() == Value::gpr_array_value) { + GPRArrayValue& val = static_cast(*v); + auto value = val.value(); + auto addr = val.indirect(); + val.reset_value(remap_one_registers(value)); + if (addr) { + if (addr->type() == Value::gpr) + val.reset_addr(remap_one_registers(addr)); + } + size_t range_start = val.sel(); + size_t range_end = range_start + val.array_size(); + while (range_start < range_end) + m_map[range_start++].used = true; + } +} + +void ValueRemapper::remap(GPRVector& v) +{ + for (int i = 0; i < 4; ++i) { + if (v.reg_i(i)) { + auto& ns_idx = m_map[v.reg_i(i)->sel()]; + if (ns_idx.valid) + v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan())); + m_map[v.reg_i(i)->sel()].used = true; + } + } +} + +PValue ValueRemapper::remap_one_registers(PValue& reg) +{ + auto new_index = m_map[reg->sel()]; + if (new_index.valid) + reg = m_values.get_or_inject(new_index.new_reg, reg->chan()); + m_map[reg->sel()].used = true; + return reg; +} + + Instruction::Instruction(instr_type t): m_type(t) { @@ -49,6 +100,54 @@ void Instruction::print(std::ostream& os) const do_print(os); } + +void Instruction::remap_registers(ValueRemapper& map) +{ + sfn_log << SfnLog::merge << "REMAP " << *this << "\n"; + for (auto& v: m_mappable_src_registers) + map.remap(*v); + + for (auto& v: m_mappable_src_vectors) + map.remap(*v); + + for (auto& v: m_mappable_dst_registers) + map.remap(*v); + + for (auto& v: m_mappable_dst_vectors) + map.remap(*v); + sfn_log << SfnLog::merge << "TO " << *this << "\n\n"; +} + +void Instruction::replace_values(UNUSED const ValueSet& candiates, UNUSED PValue new_value) +{ + +} + +void Instruction::evalue_liveness(LiverangeEvaluator& eval) const +{ + sfn_log << SfnLog::merge << "Scan " << *this << "\n"; + for (const auto& s: m_mappable_src_registers) + if (*s) + eval.record_read(**s); + + for (const auto& s: m_mappable_src_vectors) + eval.record_read(*s); + + for (const auto& s: m_mappable_dst_registers) + if (*s) + eval.record_write(**s); + + for (const auto& s: m_mappable_dst_vectors) + eval.record_write(*s); + + do_evalue_liveness(eval); +} + +void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const +{ + +} + bool operator == (const Instruction& lhs, const Instruction& rhs) { if (rhs.m_type != lhs.m_type) diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h index fe481fbabfb..e44154ebe91 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_base.h +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_base.h @@ -39,6 +39,8 @@ namespace r600 { + +class LiverangeEvaluator; using OutputRegisterMap = std::map; class Instruction { @@ -78,7 +80,12 @@ public: void print(std::ostream& os) const; + void evalue_liveness(LiverangeEvaluator& eval) const; + private: + + virtual void do_evalue_liveness(LiverangeEvaluator& eval) const; + virtual bool is_equal_to(const Instruction& lhs) const = 0; instr_type m_type; diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp index 7dde127cd20..82b0f47403a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp @@ -25,6 +25,7 @@ */ #include "sfn_instruction_cf.h" +#include "sfn_liverange.h" namespace r600 { @@ -46,6 +47,11 @@ IfInstruction::IfInstruction(AluInstruction *pred): PValue *v = m_pred->psrc(0); } +void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_if(); +} + bool IfInstruction::is_equal_to(const Instruction& lhs) const { assert(lhs.type() == cond_if); @@ -65,6 +71,11 @@ ElseInstruction::ElseInstruction(IfInstruction *jump_src): { } +void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_else(); +} + bool ElseInstruction::is_equal_to(const Instruction& lhs) const { @@ -84,6 +95,11 @@ IfElseEndInstruction::IfElseEndInstruction(): { } +void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_endif(); +} + bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const { if (lhs.type() != cond_endif) @@ -101,6 +117,11 @@ LoopBeginInstruction::LoopBeginInstruction(): { } +void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_loop_begin(); +} + bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const { assert(lhs.type() == loop_begin); @@ -118,6 +139,11 @@ LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start): { } +void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_loop_end(); +} + bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const { assert(lhs.type() == loop_end); @@ -135,6 +161,11 @@ LoopBreakInstruction::LoopBreakInstruction(): { } +void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_loop_break(); +} + bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const { return true; diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h index abf84f0eb18..10da90f7cbd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h @@ -46,6 +46,7 @@ public: IfInstruction(AluInstruction *pred); const AluInstruction& pred() const {return *m_pred;} private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; bool is_equal_to(const Instruction& lhs) const override; void do_print(std::ostream& os) const override; std::shared_ptr m_pred; @@ -55,6 +56,7 @@ class ElseInstruction : public IfElseInstruction { public: ElseInstruction(IfInstruction *jump_src); private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; bool is_equal_to(const Instruction& lhs) const override; void do_print(std::ostream& os) const override; @@ -65,6 +67,7 @@ class IfElseEndInstruction : public IfElseInstruction { public: IfElseEndInstruction(); private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; bool is_equal_to(const Instruction& lhs) const override; void do_print(std::ostream& os) const override; }; @@ -73,6 +76,7 @@ class LoopBeginInstruction: public CFInstruction { public: LoopBeginInstruction(); private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; bool is_equal_to(const Instruction& lhs) const override; void do_print(std::ostream& os) const override; }; @@ -81,6 +85,7 @@ class LoopEndInstruction: public CFInstruction { public: LoopEndInstruction(LoopBeginInstruction *start); private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; bool is_equal_to(const Instruction& lhs) const override; void do_print(std::ostream& os) const override; LoopBeginInstruction *m_start; @@ -90,6 +95,7 @@ class LoopBreakInstruction: public CFInstruction { public: LoopBreakInstruction(); private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; bool is_equal_to(const Instruction& lhs) const override; void do_print(std::ostream& os) const override; }; diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp index 7efa0832da1..14ae52f325e 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp @@ -26,6 +26,7 @@ #include "sfn_instruction_export.h" +#include "sfn_liverange.h" #include "sfn_valuepool.h" namespace r600 { diff --git a/src/gallium/drivers/r600/sfn/sfn_liverange.cpp b/src/gallium/drivers/r600/sfn/sfn_liverange.cpp new file mode 100644 index 00000000000..1c95c3bc4d1 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_liverange.cpp @@ -0,0 +1,987 @@ +/* + * Copyright (c) 2017-2019 Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_liverange.h" +#include "sfn_debug.h" +#include "sfn_value.h" +#include "sfn_value_gpr.h" + +#include "program/prog_instruction.h" +#include "util/bitscan.h" +#include "util/u_math.h" + +#include +#include +#include + +/* std::sort is significantly faster than qsort */ +#include + +/* If is included this is defined and clashes with + * std::numeric_limits<>::max() + */ +#ifdef max +#undef max +#endif + + +namespace r600 { + +using std::numeric_limits; +using std::unique_ptr; +using std::setw; + +prog_scope_storage::prog_scope_storage(int n): + current_slot(0), + storage(n) +{ +} + +prog_scope_storage::~prog_scope_storage() +{ +} + +prog_scope* +prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id, + int lvl, int s_begin) +{ + storage[current_slot] = prog_scope(p, type, id, lvl, s_begin); + return &storage[current_slot++]; +} + +prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id, + int depth, int scope_begin): + scope_type(type), + scope_id(id), + scope_nesting_depth(depth), + scope_begin(scope_begin), + scope_end(-1), + break_loop_line(numeric_limits::max()), + parent_scope(parent) +{ +} + +prog_scope::prog_scope(): + prog_scope(nullptr, undefined_scope, -1, -1, -1) +{ +} + +prog_scope_type prog_scope::type() const +{ + return scope_type; +} + +prog_scope *prog_scope::parent() const +{ + return parent_scope; +} + +int prog_scope::nesting_depth() const +{ + return scope_nesting_depth; +} + +bool prog_scope::is_loop() const +{ + return (scope_type == loop_body); +} + +bool prog_scope::is_in_loop() const +{ + if (scope_type == loop_body) + return true; + + if (parent_scope) + return parent_scope->is_in_loop(); + + return false; +} + +const prog_scope *prog_scope::innermost_loop() const +{ + if (scope_type == loop_body) + return this; + + if (parent_scope) + return parent_scope->innermost_loop(); + + return nullptr; +} + +const prog_scope *prog_scope::outermost_loop() const +{ + const prog_scope *loop = nullptr; + const prog_scope *p = this; + + do { + if (p->type() == loop_body) + loop = p; + p = p->parent(); + } while (p); + + return loop; +} + +bool prog_scope::is_child_of_ifelse_id_sibling(const prog_scope *scope) const +{ + const prog_scope *my_parent = in_parent_ifelse_scope(); + while (my_parent) { + /* is a direct child? */ + if (my_parent == scope) + return false; + /* is a child of the conditions sibling? */ + if (my_parent->id() == scope->id()) + return true; + my_parent = my_parent->in_parent_ifelse_scope(); + } + return false; +} + +bool prog_scope::is_child_of(const prog_scope *scope) const +{ + const prog_scope *my_parent = parent(); + while (my_parent) { + if (my_parent == scope) + return true; + my_parent = my_parent->parent(); + } + return false; +} + +const prog_scope *prog_scope::enclosing_conditional() const +{ + if (is_conditional()) + return this; + + if (parent_scope) + return parent_scope->enclosing_conditional(); + + return nullptr; +} + +bool prog_scope::contains_range_of(const prog_scope& other) const +{ + return (begin() <= other.begin()) && (end() >= other.end()); +} + +bool prog_scope::is_conditional() const +{ + return scope_type == if_branch || + scope_type == else_branch || + scope_type == switch_case_branch || + scope_type == switch_default_branch; +} + +const prog_scope *prog_scope::in_else_scope() const +{ + if (scope_type == else_branch) + return this; + + if (parent_scope) + return parent_scope->in_else_scope(); + + return nullptr; +} + +const prog_scope *prog_scope::in_parent_ifelse_scope() const +{ + if (parent_scope) + return parent_scope->in_ifelse_scope(); + else + return nullptr; +} + +const prog_scope *prog_scope::in_ifelse_scope() const +{ + if (scope_type == if_branch || + scope_type == else_branch) + return this; + + if (parent_scope) + return parent_scope->in_ifelse_scope(); + + return nullptr; +} + +bool prog_scope::is_switchcase_scope_in_loop() const +{ + return (scope_type == switch_case_branch || + scope_type == switch_default_branch) && + is_in_loop(); +} + +bool prog_scope::break_is_for_switchcase() const +{ + if (scope_type == loop_body) + return false; + + if (scope_type == switch_case_branch || + scope_type == switch_default_branch || + scope_type == switch_body) + return true; + + if (parent_scope) + return parent_scope->break_is_for_switchcase(); + + return false; +} + +int prog_scope::id() const +{ + return scope_id; +} + +int prog_scope::begin() const +{ + return scope_begin; +} + +int prog_scope::end() const +{ + return scope_end; +} + +void prog_scope::set_end(int end) +{ + if (scope_end == -1) + scope_end = end; +} + +void prog_scope::set_loop_break_line(int line) +{ + if (scope_type == loop_body) { + break_loop_line = MIN2(break_loop_line, line); + } else { + if (parent_scope) + parent()->set_loop_break_line(line); + } +} + +int prog_scope::loop_break_line() const +{ + return break_loop_line; +} + +temp_access::temp_access(): + access_mask(0), + needs_component_tracking(false), + is_array_element(false) +{ +} + +void temp_access::update_access_mask(int mask) +{ + if (access_mask && access_mask != mask) + needs_component_tracking = true; + access_mask |= mask; +} + +void temp_access::record_write(int line, prog_scope *scope, int writemask, bool is_array_elm) +{ + + + update_access_mask(writemask); + is_array_element |= is_array_elm; + + if (writemask & WRITEMASK_X) + comp[0].record_write(line, scope); + if (writemask & WRITEMASK_Y) + comp[1].record_write(line, scope); + if (writemask & WRITEMASK_Z) + comp[2].record_write(line, scope); + if (writemask & WRITEMASK_W) + comp[3].record_write(line, scope); +} + +void temp_access::record_read(int line, prog_scope *scope, int readmask, bool is_array_elm) +{ + update_access_mask(readmask); + is_array_element |= is_array_elm; + + if (readmask & WRITEMASK_X) + comp[0].record_read(line, scope); + if (readmask & WRITEMASK_Y) + comp[1].record_read(line, scope); + if (readmask & WRITEMASK_Z) + comp[2].record_read(line, scope); + if (readmask & WRITEMASK_W) + comp[3].record_read(line, scope); +} + +inline static register_live_range make_live_range(int b, int e) +{ + register_live_range lt; + lt.begin = b; + lt.end = e; + lt.is_array_elm = false; + return lt; +} + +register_live_range temp_access::get_required_live_range() +{ + register_live_range result = make_live_range(-1, -1); + + unsigned mask = access_mask; + while (mask) { + unsigned chan = u_bit_scan(&mask); + register_live_range lt = comp[chan].get_required_live_range(); + + if (lt.begin >= 0) { + if ((result.begin < 0) || (result.begin > lt.begin)) + result.begin = lt.begin; + } + + if (lt.end > result.end) + result.end = lt.end; + + if (!needs_component_tracking) + break; + } + result.is_array_elm = is_array_element; + return result; +} + +const int +temp_comp_access::conditionality_untouched = std::numeric_limits::max(); + +const int +temp_comp_access::write_is_unconditional = std::numeric_limits::max() - 1; + + +temp_comp_access::temp_comp_access(): + last_read_scope(nullptr), + first_read_scope(nullptr), + first_write_scope(nullptr), + first_write(-1), + last_read(-1), + last_write(-1), + first_read(numeric_limits::max()), + conditionality_in_loop_id(conditionality_untouched), + if_scope_write_flags(0), + next_ifelse_nesting_depth(0), + current_unpaired_if_write_scope(nullptr), + was_written_in_current_else_scope(false) +{ +} + +void temp_comp_access::record_read(int line, prog_scope *scope) +{ + last_read_scope = scope; + last_read = line; + + if (first_read > line) { + first_read = line; + first_read_scope = scope; + } + + /* If the conditionality of the first write is already resolved then + * no further checks are required. + */ + if (conditionality_in_loop_id == write_is_unconditional || + conditionality_in_loop_id == write_is_conditional) + return; + + /* Check whether we are in a condition within a loop */ + const prog_scope *ifelse_scope = scope->in_ifelse_scope(); + const prog_scope *enclosing_loop; + if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) { + + /* If we have either not yet written to this register nor writes are + * resolved as unconditional in the enclosing loop then check whether + * we read before write in an IF/ELSE branch. + */ + if ((conditionality_in_loop_id != write_is_conditional) && + (conditionality_in_loop_id != enclosing_loop->id())) { + + if (current_unpaired_if_write_scope) { + + /* Has been written in this or a parent scope? - this makes the temporary + * unconditionally set at this point. + */ + if (scope->is_child_of(current_unpaired_if_write_scope)) + return; + + /* Has been written in the same scope before it was read? */ + if (ifelse_scope->type() == if_branch) { + if (current_unpaired_if_write_scope->id() == scope->id()) + return; + } else { + if (was_written_in_current_else_scope) + return; + } + } + + /* The temporary was read (conditionally) before it is written, hence + * it should survive a loop. This can be signaled like if it were + * conditionally written. + */ + conditionality_in_loop_id = write_is_conditional; + } + } +} + +void temp_comp_access::record_write(int line, prog_scope *scope) +{ + last_write = line; + + if (first_write < 0) { + first_write = line; + first_write_scope = scope; + + /* If the first write we encounter is not in a conditional branch, or + * the conditional write is not within a loop, then this is to be + * considered an unconditional dominant write. + */ + const prog_scope *conditional = scope->enclosing_conditional(); + if (!conditional || !conditional->innermost_loop()) { + conditionality_in_loop_id = write_is_unconditional; + } + } + + /* The conditionality of the first write is already resolved. */ + if (conditionality_in_loop_id == write_is_unconditional || + conditionality_in_loop_id == write_is_conditional) + return; + + /* If the nesting depth is larger than the supported level, + * then we assume conditional writes. + */ + if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) { + conditionality_in_loop_id = write_is_conditional; + return; + } + + /* If we are in an IF/ELSE scope within a loop and the loop has not + * been resolved already, then record this write. + */ + const prog_scope *ifelse_scope = scope->in_ifelse_scope(); + if (ifelse_scope && ifelse_scope->innermost_loop() && + ifelse_scope->innermost_loop()->id() != conditionality_in_loop_id) + record_ifelse_write(*ifelse_scope); +} + +void temp_comp_access::record_ifelse_write(const prog_scope& scope) +{ + if (scope.type() == if_branch) { + /* The first write in an IF branch within a loop implies unresolved + * conditionality (if it was untouched or unconditional before). + */ + conditionality_in_loop_id = conditionality_unresolved; + was_written_in_current_else_scope = false; + record_if_write(scope); + } else { + was_written_in_current_else_scope = true; + record_else_write(scope); + } +} + +void temp_comp_access::record_if_write(const prog_scope& scope) +{ + /* Don't record write if this IF scope if it ... + * - is not the first write in this IF scope, + * - has already been written in a parent IF scope. + * In both cases this write is a secondary write that doesn't contribute + * to resolve conditionality. + * + * Record the write if it + * - is the first one (obviously), + * - happens in an IF branch that is a child of the ELSE branch of the + * last active IF/ELSE pair. In this case recording this write is used to + * established whether the write is (un-)conditional in the scope enclosing + * this outer IF/ELSE pair. + */ + if (!current_unpaired_if_write_scope || + (current_unpaired_if_write_scope->id() != scope.id() && + scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope))) { + if_scope_write_flags |= 1 << next_ifelse_nesting_depth; + current_unpaired_if_write_scope = &scope; + next_ifelse_nesting_depth++; + } +} + +void temp_comp_access::record_else_write(const prog_scope& scope) +{ + int mask = 1 << (next_ifelse_nesting_depth - 1); + + /* If the temporary was written in an IF branch on the same scope level + * and this branch is the sibling of this ELSE branch, then we have a + * pair of writes that makes write access to this temporary unconditional + * in the enclosing scope. + */ + + if ((if_scope_write_flags & mask) && + (scope.id() == current_unpaired_if_write_scope->id())) { + --next_ifelse_nesting_depth; + if_scope_write_flags &= ~mask; + + /* The following code deals with propagating unconditionality from + * inner levels of nested IF/ELSE to the outer levels like in + * + * 1: var t; + * 2: if (a) { <- start scope A + * 3: if (b) + * 4: t = ... + * 5: else + * 6: t = ... + * 7: } else { <- start scope B + * 8: if (c) + * 9: t = ... + * A: else <- start scope C + * B: t = ... + * C: } + * + */ + + const prog_scope *parent_ifelse = scope.parent()->in_ifelse_scope(); + + if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) { + /* We are at the end of scope C and already recorded a write + * within an IF scope (A), the sibling of the parent ELSE scope B, + * and it is not yet resolved. Mark that as the last relevant + * IF scope. Below the write will be resolved for the A/B + * scope pair. + */ + current_unpaired_if_write_scope = parent_ifelse; + } else { + current_unpaired_if_write_scope = nullptr; + } + /* Promote the first write scope to the enclosing scope because + * the current IF/ELSE pair is now irrelevant for the analysis. + * This is also required to evaluate the minimum life time for t in + * { + * var t; + * if (a) + * t = ... + * else + * t = ... + * x = t; + * ... + * } + */ + first_write_scope = scope.parent(); + + /* If some parent is IF/ELSE and in a loop then propagate the + * write to that scope. Otherwise the write is unconditional + * because it happens in both corresponding IF/ELSE branches + * in this loop, and hence, record the loop id to signal the + * resolution. + */ + if (parent_ifelse && parent_ifelse->is_in_loop()) { + record_ifelse_write(*parent_ifelse); + } else { + conditionality_in_loop_id = scope.innermost_loop()->id(); + } + } else { + /* The temporary was not written in the IF branch corresponding + * to this ELSE branch, hence the write is conditional. + */ + conditionality_in_loop_id = write_is_conditional; + } +} + +bool temp_comp_access::conditional_ifelse_write_in_loop() const +{ + return conditionality_in_loop_id <= conditionality_unresolved; +} + +void temp_comp_access::propagate_live_range_to_dominant_write_scope() +{ + first_write = first_write_scope->begin(); + int lr = first_write_scope->end(); + + if (last_read < lr) + last_read = lr; +} + +register_live_range temp_comp_access::get_required_live_range() +{ + bool keep_for_full_loop = false; + + /* This register component is not used at all, or only read, + * mark it as unused and ignore it when renaming. + * glsl_to_tgsi_visitor::renumber_registers will take care of + * eliminating registers that are not written to. + */ + if (last_write < 0) + return make_live_range(-1, -1); + + assert(first_write_scope); + + /* Only written to, just make sure the register component is not + * reused in the range it is used to write to + */ + if (!last_read_scope) + return make_live_range(first_write, last_write + 1); + + const prog_scope *enclosing_scope_first_read = first_read_scope; + const prog_scope *enclosing_scope_first_write = first_write_scope; + + /* We read before writing in a loop + * hence the value must survive the loops + */ + if ((first_read <= first_write) && + first_read_scope->is_in_loop()) { + keep_for_full_loop = true; + enclosing_scope_first_read = first_read_scope->outermost_loop(); + } + + /* A conditional write within a (nested) loop must survive the outermost + * loop if the last read was not within the same scope. + */ + const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional(); + if (conditional && !conditional->contains_range_of(*last_read_scope) && + (conditional->is_switchcase_scope_in_loop() || + conditional_ifelse_write_in_loop())) { + keep_for_full_loop = true; + enclosing_scope_first_write = conditional->outermost_loop(); + } + + /* Evaluate the scope that is shared by all: required first write scope, + * required first read before write scope, and last read scope. + */ + const prog_scope *enclosing_scope = enclosing_scope_first_read; + if (enclosing_scope_first_write->contains_range_of(*enclosing_scope)) + enclosing_scope = enclosing_scope_first_write; + + if (last_read_scope->contains_range_of(*enclosing_scope)) + enclosing_scope = last_read_scope; + + while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) || + !enclosing_scope->contains_range_of(*last_read_scope)) { + enclosing_scope = enclosing_scope->parent(); + assert(enclosing_scope); + } + + /* Propagate the last read scope to the target scope */ + while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) { + /* If the read is in a loop and we have to move up the scope we need to + * extend the live range to the end of this current loop because at this + * point we don't know whether the component was written before + * un-conditionally in the same loop. + */ + if (last_read_scope->is_loop()) + last_read = last_read_scope->end(); + + last_read_scope = last_read_scope->parent(); + } + + /* If the variable has to be kept for the whole loop, and we + * are currently in a loop, then propagate the live range. + */ + if (keep_for_full_loop && first_write_scope->is_loop()) + propagate_live_range_to_dominant_write_scope(); + + /* Propagate the first_dominant_write scope to the target scope */ + while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) { + /* Propagate live_range if there was a break in a loop and the write was + * after the break inside that loop. Note, that this is only needed if + * we move up in the scopes. + */ + if (first_write_scope->loop_break_line() < first_write) { + keep_for_full_loop = true; + propagate_live_range_to_dominant_write_scope(); + } + + first_write_scope = first_write_scope->parent(); + + /* Propagte live_range if we are now in a loop */ + if (keep_for_full_loop && first_write_scope->is_loop()) + propagate_live_range_to_dominant_write_scope(); + } + + /* The last write past the last read is dead code, but we have to + * ensure that the component is not reused too early, hence extend the + * live_range past the last write. + */ + if (last_write >= last_read) + last_read = last_write + 1; + + /* Here we are at the same scope, all is resolved */ + return make_live_range(first_write, last_read); +} + +/* Helper class for sorting and searching the registers based + * on live ranges. */ +class register_merge_record { +public: + int begin; + int end; + int reg; + bool erase; + bool is_array_elm; + + bool operator < (const register_merge_record& rhs) const { + return begin < rhs.begin; + } +}; + +LiverangeEvaluator::LiverangeEvaluator(): + line(0), + loop_id(1), + if_id(1), + switch_id(0), + is_at_end(false), + n_scopes(1), + cur_scope(nullptr) +{ +} + +void LiverangeEvaluator::run(const Shader& shader, + std::vector& register_live_ranges) +{ + temp_acc.resize(register_live_ranges.size()); + fill(temp_acc.begin(), temp_acc.end(), temp_access()); + + sfn_log << SfnLog::merge << "have " << temp_acc.size() << " temps\n"; + + for (const auto& ir: shader.m_ir) { + switch (ir->type()) { + case Instruction::cond_if: + case Instruction::cond_else: + case Instruction::loop_begin: + ++n_scopes; + default: + ; + } + } + + scopes.reset(new prog_scope_storage(n_scopes)); + + cur_scope = scopes->create(nullptr, outer_scope, 0, 0, line); + + line = 0; + + for (auto& v: shader.m_temp) { + if (v.second->type() == Value::gpr) { + const auto& g = static_cast(*v.second); + if (g.is_input()) { + sfn_log << SfnLog::merge << "Record INPUT write for " + << g << " in " << temp_acc.size() << " temps\n"; + temp_acc[g.sel()].record_write(line, cur_scope, 1 << g.chan(), false); + temp_acc[g.sel()].record_read(line, cur_scope, 1 << g.chan(), false); + } + } + } + + for (const auto& ir: shader.m_ir) { + ir->evalue_liveness(*this); + if (ir->type() != Instruction::alu || + static_cast(*ir).flag(alu_last_instr)) + ++line; + } + + assert(cur_scope->type() == outer_scope); + cur_scope->set_end(line); + is_at_end = true; + + get_required_live_ranges(register_live_ranges); +} + + +void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm) +{ + sfn_log << SfnLog::merge << "Record read l:" << line << " reg:" << src << "\n"; + if (src.type() == Value::gpr) { + const GPRValue& v = static_cast(src); + if (v.chan() < 4) + temp_acc[v.sel()].record_read(line, cur_scope, 1 << v.chan(), is_array_elm); + return; + } else if (src.type() == Value::gpr_array_value) { + const GPRArrayValue& v = static_cast(src); + v.record_read(*this); + } +} + +void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm) +{ + sfn_log << SfnLog::merge << "Record write for " + << src << " in " << temp_acc.size() << " temps\n"; + + if (src.type() == Value::gpr) { + const GPRValue& v = static_cast(src); + assert(v.sel() < temp_acc.size()); + if (v.chan() < 4) + temp_acc[v.sel()].record_write(line, cur_scope, 1 << v.chan(), is_array_elm); + return; + } else if (src.type() == Value::gpr_array_value) { + const GPRArrayValue& v = static_cast(src); + v.record_write(*this); + } +} + +void LiverangeEvaluator::record_read(const GPRVector& src) +{ + for (int i = 0; i < 4; ++i) + if (src.reg_i(i)) + record_read(*src.reg_i(i)); +} + +void LiverangeEvaluator::record_write(const GPRVector& dst) +{ + for (int i = 0; i < 4; ++i) + if (dst.reg_i(i)) + record_write(*dst.reg_i(i)); +} + +void LiverangeEvaluator::get_required_live_ranges(std::vector& register_live_ranges) +{ + sfn_log << SfnLog::merge << "== register live ranges ==========\n"; + for(unsigned i = 0; i < register_live_ranges.size(); ++i) { + sfn_log << SfnLog::merge << setw(4) << i; + register_live_ranges[i] = temp_acc[i].get_required_live_range(); + sfn_log << SfnLog::merge << ": [" << register_live_ranges[i].begin << ", " + << register_live_ranges[i].end << "]\n"; + } + sfn_log << SfnLog::merge << "==================================\n\n"; +} + +void LiverangeEvaluator::scope_if() +{ + cur_scope = scopes->create(cur_scope, if_branch, if_id++, + cur_scope->nesting_depth() + 1, line + 1); +} + +void LiverangeEvaluator::scope_else() +{ + assert(cur_scope->type() == if_branch); + cur_scope->set_end(line - 1); + cur_scope = scopes->create(cur_scope->parent(), else_branch, + cur_scope->id(), cur_scope->nesting_depth(), + line + 1); +} + +void LiverangeEvaluator::scope_endif() +{ + cur_scope->set_end(line - 1); + cur_scope = cur_scope->parent(); + assert(cur_scope); +} + +void LiverangeEvaluator::scope_loop_begin() +{ + cur_scope = scopes->create(cur_scope, loop_body, loop_id++, + cur_scope->nesting_depth() + 1, line); +} + +void LiverangeEvaluator::scope_loop_end() +{ + assert(cur_scope->type() == loop_body); + cur_scope->set_end(line); + cur_scope = cur_scope->parent(); + assert(cur_scope); +} + +void LiverangeEvaluator::scope_loop_break() +{ + cur_scope->set_loop_break_line(line); +} + +/* This functions evaluates the register merges by using a binary + * search to find suitable merge candidates. */ + +std::vector +get_temp_registers_remapping(const std::vector& live_ranges) +{ + + std::vector result(live_ranges.size(), rename_reg_pair{false, false, 0}); + std::vector reg_access; + + for (unsigned i = 0; i < live_ranges.size(); ++i) { + if (live_ranges[i].begin >= 0) { + register_merge_record r; + r.begin = live_ranges[i].begin; + r.end = live_ranges[i].end; + r.is_array_elm = live_ranges[i].is_array_elm; + r.reg = i; + r.erase = false; + reg_access.push_back(r); + } + } + + std::sort(reg_access.begin(), reg_access.end()); + + for (auto& r : reg_access) + sfn_log << SfnLog::merge << "Use Range " <reg << "[" << trgt->begin << ", " << trgt->end << "]\n"; + + + auto src = upper_bound(search_start, reg_access_end, trgt->end, + [](int bound, const register_merge_record& m){ + return bound < m.begin && !m.is_array_elm;} + ); + + if (src != reg_access_end) { + result[src->reg].new_reg = trgt->reg; + result[src->reg].valid = true; + + sfn_log << SfnLog::merge << "Map " + << src->reg << "[" << src->begin << ", " << src->end << "] to " + << trgt->reg << "[" << trgt->begin << ", " << trgt->end << ":"; + trgt->end = src->end; + sfn_log << SfnLog::merge << trgt->end << "]\n"; + + /* Since we only search forward, don't remove the renamed + * register just now, only mark it. */ + src->erase = true; + + if (first_erase == reg_access_end) + first_erase = src; + + search_start = src + 1; + } else { + /* Moving to the next target register it is time to remove + * the already merged registers from the search range */ + if (first_erase != reg_access_end) { + auto outp = first_erase; + auto inp = first_erase + 1; + + while (inp != reg_access_end) { + if (!inp->erase) + *outp++ = *inp; + ++inp; + } + + reg_access_end = outp; + first_erase = reg_access_end; + } + ++trgt; + search_start = trgt + 1; + } + } + return result; +} + +} // end ns r600 diff --git a/src/gallium/drivers/r600/sfn/sfn_liverange.h b/src/gallium/drivers/r600/sfn/sfn_liverange.h new file mode 100644 index 00000000000..d3480ab85ca --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_liverange.h @@ -0,0 +1,314 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_LIVERANGE_H +#define SFN_LIVERANGE_H + +#include +#include +#include +#include + +#include "sfn_instruction_base.h" +#include "sfn_nir.h" + +namespace r600 { + +/** Storage to record the required live range of a temporary register + * begin == end == -1 indicates that the register can be reused without + * limitations. Otherwise, "begin" indicates the first instruction in which + * a write operation may target this temporary, and end indicates the + * last instruction in which a value can be read from this temporary. + * Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin. + */ +struct register_live_range { + int begin; + int end; + bool is_array_elm; +}; + +enum prog_scope_type { + outer_scope, /* Outer program scope */ + loop_body, /* Inside a loop */ + if_branch, /* Inside if branch */ + else_branch, /* Inside else branch */ + switch_body, /* Inside switch statmenet */ + switch_case_branch, /* Inside switch case statmenet */ + switch_default_branch, /* Inside switch default statmenet */ + undefined_scope +}; + +class prog_scope { +public: + prog_scope(); + prog_scope(prog_scope *parent, prog_scope_type type, int id, + int depth, int begin); + + prog_scope_type type() const; + prog_scope *parent() const; + int nesting_depth() const; + int id() const; + int end() const; + int begin() const; + int loop_break_line() const; + + const prog_scope *in_else_scope() const; + const prog_scope *in_ifelse_scope() const; + const prog_scope *in_parent_ifelse_scope() const; + const prog_scope *innermost_loop() const; + const prog_scope *outermost_loop() const; + const prog_scope *enclosing_conditional() const; + + bool is_loop() const; + bool is_in_loop() const; + bool is_switchcase_scope_in_loop() const; + bool is_conditional() const; + bool is_child_of(const prog_scope *scope) const; + bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const; + + bool break_is_for_switchcase() const; + bool contains_range_of(const prog_scope& other) const; + + void set_end(int end); + void set_loop_break_line(int line); + +private: + prog_scope_type scope_type; + int scope_id; + int scope_nesting_depth; + int scope_begin; + int scope_end; + int break_loop_line; + prog_scope *parent_scope; +}; + +/* Some storage class to encapsulate the prog_scope (de-)allocations */ +class prog_scope_storage { +public: + prog_scope_storage(int n); + ~prog_scope_storage(); + prog_scope * create(prog_scope *p, prog_scope_type type, int id, + int lvl, int s_begin); +private: + int current_slot; + std::vector storage; +}; + +/* Class to track the access to a component of a temporary register. */ + +class temp_comp_access { +public: + temp_comp_access(); + + void record_read(int line, prog_scope *scope); + void record_write(int line, prog_scope *scope); + register_live_range get_required_live_range(); +private: + void propagate_live_range_to_dominant_write_scope(); + bool conditional_ifelse_write_in_loop() const; + + void record_ifelse_write(const prog_scope& scope); + void record_if_write(const prog_scope& scope); + void record_else_write(const prog_scope& scope); + + prog_scope *last_read_scope; + prog_scope *first_read_scope; + prog_scope *first_write_scope; + + int first_write; + int last_read; + int last_write; + int first_read; + + /* This member variable tracks the current resolution of conditional writing + * to this temporary in IF/ELSE clauses. + * + * The initial value "conditionality_untouched" indicates that this + * temporary has not yet been written to within an if clause. + * + * A positive (other than "conditionality_untouched") number refers to the + * last loop id for which the write was resolved as unconditional. With each + * new loop this value will be overwitten by "conditionality_unresolved" + * on entering the first IF clause writing this temporary. + * + * The value "conditionality_unresolved" indicates that no resolution has + * been achieved so far. If the variable is set to this value at the end of + * the processing of the whole shader it also indicates a conditional write. + * + * The value "write_is_conditional" marks that the variable is written + * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at + * least one loop. + */ + int conditionality_in_loop_id; + + /* Helper constants to make the tracking code more readable. */ + static const int write_is_conditional = -1; + static const int conditionality_unresolved = 0; + static const int conditionality_untouched; + static const int write_is_unconditional; + + /* A bit field tracking the nexting levels of if-else clauses where the + * temporary has (so far) been written to in the if branch, but not in the + * else branch. + */ + unsigned int if_scope_write_flags; + + int next_ifelse_nesting_depth; + static const int supported_ifelse_nesting_depth = 32; + + /* Tracks the last if scope in which the temporary was written to + * without a write in the correspondig else branch. Is also used + * to track read-before-write in the according scope. + */ + const prog_scope *current_unpaired_if_write_scope; + + /* Flag to resolve read-before-write in the else scope. */ + bool was_written_in_current_else_scope; +}; + +/* Class to track the access to all components of a temporary register. */ +class temp_access { +public: + temp_access(); + void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm); + void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm); + register_live_range get_required_live_range(); +private: + void update_access_mask(int mask); + + temp_comp_access comp[4]; + int access_mask; + bool needs_component_tracking; + bool is_array_element; +}; + +/* Helper class to merge the live ranges of an arrays. + * + * For arrays the array length, live range, and component access needs to + * be kept, because when live ranges are merged or arrays are interleaved + * one can only merge or interleave an array into another with equal or more + * elements. For interleaving it is also required that the sum of used swizzles + * is at most four. + */ + +class array_live_range { +public: + array_live_range(); + array_live_range(unsigned aid, unsigned alength); + array_live_range(unsigned aid, unsigned alength, int first_access, + int last_access, int mask); + + void set_live_range(int first_access, int last_access); + void set_begin(int _begin){first_access = _begin;} + void set_end(int _end){last_access = _end;} + void set_access_mask(int s); + + static void merge(array_live_range *a, array_live_range *b); + static void interleave(array_live_range *a, array_live_range *b); + + int array_id() const {return id;} + int target_array_id() const {return target_array ? target_array->id : 0;} + const array_live_range *final_target() const {return target_array ? + target_array->final_target() : this;} + unsigned array_length() const { return length;} + int begin() const { return first_access;} + int end() const { return last_access;} + int access_mask() const { return component_access_mask;} + int used_components() const {return used_component_count;} + + bool time_doesnt_overlap(const array_live_range& other) const; + + void print(std::ostream& os) const; + + bool is_mapped() const { return target_array != nullptr;} + + int8_t remap_one_swizzle(int8_t idx) const; + +private: + void init_swizzles(); + void set_target(array_live_range *target); + void merge_live_range_from(array_live_range *other); + void interleave_into(array_live_range *other); + + unsigned id; + unsigned length; + int first_access; + int last_access; + uint8_t component_access_mask; + uint8_t used_component_count; + array_live_range *target_array; + int8_t swizzle_map[4]; +}; + + + +class LiverangeEvaluator { +public: + LiverangeEvaluator(); + + void run(const Shader& shader, + std::vector ®ister_live_ranges); + + void scope_if(); + void scope_else(); + void scope_endif(); + void scope_loop_begin(); + void scope_loop_end(); + void scope_loop_break(); + + void record_read(const Value& src, bool is_array_elm = false); + void record_write(const Value& dst, bool is_array_elm = false); + + void record_read(const GPRVector& src); + void record_write(const GPRVector& dst); + +private: + + prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id, + int lvl, int s_begin); + + + void get_required_live_ranges(std::vector& register_live_ranges); + + int line; + int loop_id; + int if_id; + int switch_id; + bool is_at_end; + int n_scopes; + std::unique_ptr scopes; + prog_scope *cur_scope; + + std::vector temp_acc; + +}; + +std::vector +get_temp_registers_remapping(const std::vector& live_ranges); + +} // end namespace r600 + +#endif diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index ef368cd1d4e..3bf430438c9 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -121,6 +121,10 @@ bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shade sfn_log << SfnLog::trans << "Finalize\n"; impl->finalize(); + if (!sfn_log.has_debug_flag(SfnLog::nomerge)) { + sfn_log << SfnLog::trans << "Merge registers\n"; + impl->remap_registers(); + } sfn_log << SfnLog::trans << "Finished translating to R600 IR\n"; return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp index 04e1ee58444..a0b0d8b7f87 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp @@ -29,6 +29,7 @@ #include "sfn_shader_vertex.h" #include "sfn_shader_fragment.h" +#include "sfn_liverange.h" #include "sfn_ir_to_assembly.h" #include "sfn_nir.h" #include "sfn_instruction_misc.h" @@ -86,6 +87,80 @@ bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr) return scan_sysvalue_access(instr); } +static void remap_shader_info(r600_shader& sh_info, + std::vector& map, + UNUSED ValueMap& values) +{ + for (unsigned i = 0; i < sh_info.ninput; ++i) { + sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr + << " of map.size()\n"; + + assert(sh_info.input[i].gpr < map.size()); + auto new_index = map[sh_info.input[i].gpr]; + if (new_index.valid) + sh_info.input[i].gpr = new_index.new_reg; + map[sh_info.input[i].gpr].used = true; + } + + for (unsigned i = 0; i < sh_info.noutput; ++i) { + assert(sh_info.output[i].gpr < map.size()); + auto new_index = map[sh_info.output[i].gpr]; + if (new_index.valid) + sh_info.output[i].gpr = new_index.new_reg; + map[sh_info.output[i].gpr].used = true; + } +} + +void ShaderFromNirProcessor::remap_registers() +{ + // register renumbering + auto rc = register_count(); + if (!rc) + return; + + std::vector register_live_ranges(rc); + + auto temp_register_map = get_temp_registers(); + + Shader sh{m_output, temp_register_map}; + LiverangeEvaluator().run(sh, register_live_ranges); + auto register_map = get_temp_registers_remapping(register_live_ranges); + + sfn_log << SfnLog::merge << "=========Mapping===========\n"; + for (size_t i = 0; i < register_map.size(); ++i) + if (register_map[i].valid) + sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n"; + + + ValueRemapper vmap0(register_map, temp_register_map); + for (auto ir: m_output) + ir->remap_registers(vmap0); + + remap_shader_info(m_sh_info, register_map, temp_register_map); + + /* Mark inputs as used registers, these registers should no be remapped */ + for (auto& v: sh.m_temp) { + if (v.second->type() == Value::gpr) { + const auto& g = static_cast(*v.second); + if (g.is_input()) + register_map[g.sel()].used = true; + } + } + + int new_index = 0; + for (auto& i : register_map) { + i.valid = i.used; + if (i.used) + i.new_reg = new_index++; + } + + ValueRemapper vmap1(register_map, temp_register_map); + for (auto ir: m_output) + ir->remap_registers(vmap1); + + remap_shader_info(m_sh_info, register_map, temp_register_map); +} + bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform) { // m_uniform_type_map diff --git a/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp b/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp index fab4837cc77..4eb5bfb18c4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp @@ -27,6 +27,7 @@ #include "sfn_value_gpr.h" #include "sfn_valuepool.h" #include "sfn_debug.h" +#include "sfn_liverange.h" namespace r600 { @@ -218,6 +219,28 @@ bool GPRArrayValue::is_equal_to(const Value& other) const *m_array == *v.m_array; } +void GPRArrayValue::record_read(LiverangeEvaluator& ev) const +{ + if (m_addr) { + ev.record_read(*m_addr); + unsigned chan = m_value->chan(); + assert(m_array); + m_array->record_read(ev, chan); + } else + ev.record_read(*m_value); +} + +void GPRArrayValue::record_write(LiverangeEvaluator& ev) const +{ + if (m_addr) { + ev.record_read(*m_addr); + unsigned chan = m_value->chan(); + assert(m_array); + m_array->record_write(ev, chan); + } else + ev.record_write(*m_value); +} + void GPRArrayValue::reset_value(PValue new_value) { m_value = new_value; @@ -305,6 +328,18 @@ PValue GPRArray::get_indirect(unsigned index, PValue indirect, unsigned componen return v; } +void GPRArray::record_read(LiverangeEvaluator& ev, int chan) const +{ + for (auto& v: m_values) + ev.record_read(*v.reg_i(chan), true); +} + +void GPRArray::record_write(LiverangeEvaluator& ev, int chan) const +{ + for (auto& v: m_values) + ev.record_write(*v.reg_i(chan), true); +} + void GPRArray::collect_registers(ValueMap& output) const { for (auto& v: m_values) { diff --git a/src/gallium/drivers/r600/sfn/sfn_value_gpr.h b/src/gallium/drivers/r600/sfn/sfn_value_gpr.h index 2faf84aac1f..eee12c46c6a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_value_gpr.h +++ b/src/gallium/drivers/r600/sfn/sfn_value_gpr.h @@ -34,6 +34,7 @@ namespace r600 { class ValuePool; class ValueMap; +class LiverangeEvaluator; class GPRValue : public Value { public: @@ -121,6 +122,9 @@ public: PValue get_indirect(unsigned index, PValue indirect, unsigned component); + void record_read(LiverangeEvaluator& ev, int chan)const; + void record_write(LiverangeEvaluator& ev, int chan)const; + void collect_registers(ValueMap& output) const; private: @@ -142,6 +146,9 @@ public: GPRArrayValue(PValue value, GPRArray *array); GPRArrayValue(PValue value, PValue index, GPRArray *array); + void record_read(LiverangeEvaluator& ev) const; + void record_write(LiverangeEvaluator& ev) const; + size_t array_size() const; uint32_t sel() const override; -- 2.30.2