From: Gert Wollny Date: Fri, 27 Dec 2019 16:49:26 +0000 (+0100) Subject: r600/sfn: Add lowering arrays to scratch and according instructions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=31a4dd6451eec9cf96bec6d211e8e9b9f8032706;p=mesa.git r600/sfn: Add lowering arrays to scratch and according instructions Make use of the scratch space for arrays that are larger then 100 elements. Since for IO r600 is vector based, there is a bit of a scratch space waste here for arrays that use types smaller then vec4. Signed-off-by: Gert Wollny Part-of: --- diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp index d3a07713fa7..fffcb09537c 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp @@ -100,6 +100,98 @@ void ExportInstruction::set_last() m_is_last = true; } +WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value, + int align, int align_offset, int writemask): + WriteoutInstruction (Instruction::mem_wr_scratch, value), + m_loc(loc), + m_align(align), + m_align_offset(align_offset), + m_writemask(writemask), + m_array_size(0) +{ +} + +WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value, + int align, int align_offset, int writemask, int array_size): + WriteoutInstruction (Instruction::mem_wr_scratch, value), + m_loc(0), + m_address(address), + m_align(align), + m_align_offset(align_offset), + m_writemask(writemask), + m_array_size(array_size - 1) +{ + add_remappable_src_value(&m_address); +} + +bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const +{ + if (lhs.type() != Instruction::mem_wr_scratch) + return false; + const auto& other = dynamic_cast(lhs); + + if (m_address) { + if (!other.m_address) + return false; + if (*m_address != *other.m_address) + return false; + } else { + if (other.m_address) + return false; + } + + return gpr() == other.gpr() && + m_loc == other.m_loc && + m_align == other.m_align && + m_align_offset == other.m_align_offset && + m_writemask == other.m_writemask; +} + +static char *writemask_to_swizzle(int writemask, char *buf) +{ + const char *swz = "xyzw"; + for (int i = 0; i < 4; ++i) { + buf[i] = (writemask & (1 << i)) ? swz[i] : '_'; + } + return buf; +} + +void WriteScratchInstruction::do_print(std::ostream& os) const +{ + char buf[5]; + + os << "MEM_SCRATCH_WRITE "; + if (m_address) + os << "@" << *m_address << "+"; + + os << m_loc << "." << writemask_to_swizzle(m_writemask, buf) + << " " << gpr() << " AL:" << m_align << " ALO:" << m_align_offset; +} + +void WriteScratchInstruction::replace_values_child(const ValueSet& candiates, PValue new_value) +{ + if (!m_address) + return; + + for (auto c: candiates) { + if (*c == *m_address) + m_address = new_value; + } +} + +void WriteScratchInstruction::remap_registers_child(std::vector& map, + ValueMap& values) +{ + if (!m_address) + return; + sfn_log << SfnLog::merge << "Remap " << *m_address << " of type " << m_address->type() << "\n"; + assert(m_address->type() == Value::gpr); + auto new_index = map[m_address->sel()]; + if (new_index.valid) + m_address = values.get_or_inject(new_index.new_reg, m_address->chan()); + map[m_address->sel()].used = true; +} + StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components, int array_base, int comp_mask, int out_buffer, int stream): diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_export.h b/src/gallium/drivers/r600/sfn/sfn_instruction_export.h index 1971e339135..f32800381c4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_export.h +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_export.h @@ -73,6 +73,37 @@ private: bool m_is_last; }; +class WriteScratchInstruction : public WriteoutInstruction { +public: + + WriteScratchInstruction(unsigned loc, const GPRVector& value, int align, + int align_offset, int writemask); + WriteScratchInstruction(const PValue& address, const GPRVector& value, + int align, int align_offset, int writemask, int array_size); + unsigned location() const {return m_loc;} + + int write_mask() const { return m_writemask;} + int address() const { assert(m_address); return m_address->sel();} + bool indirect() const { return !!m_address;} + int array_size() const { return m_array_size;} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + void replace_values_child(const ValueSet& candiates, PValue new_value) override; + void remap_registers_child(std::vector& map, + ValueMap& values)override; + + unsigned m_loc; + PValue m_address; + unsigned m_align; + unsigned m_align_offset; + unsigned m_writemask; + int m_array_size; +}; + + class StreamOutIntruction: public WriteoutInstruction { public: StreamOutIntruction(const GPRVector& value, int num_components, diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp index c41692639b9..e51bb588969 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp @@ -296,6 +296,11 @@ const std::vector& FetchInstruction::prelude() const return m_prelude; } +LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size): + FetchInstruction(dst, src, scratch_size) +{ +} + static const char *fmt_descr[64] = { "INVALID", "8", diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h index 369094edfa7..465f604fed5 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h +++ b/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h @@ -164,6 +164,11 @@ private: std::vector m_prelude; }; +class LoadFromScratch: public FetchInstruction { +public: + LoadFromScratch(GPRVector dst, PValue src, int scratch_size); +}; + } #endif // SFN_INSTRUCTION_FETCH_H diff --git a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp index b8ceb7ff0c2..e9624c58b2b 100644 --- a/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp @@ -58,6 +58,7 @@ private: bool emit_loop_break(const LoopBreakInstruction& instr); bool emit_loop_continue(const LoopContInstruction& instr); bool emit_wait_ack(const WaitAck& instr); + bool emit_wr_scratch(const WriteScratchInstruction& instr); bool emit_load_addr(PValue addr); bool emit_fs_pixel_export(const ExportInstruction & exi); @@ -165,6 +166,8 @@ bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i) return emit_streamout(static_cast(*i)); case Instruction::wait_ack: return emit_wait_ack(static_cast(*i)); + case Instruction::mem_wr_scratch: + return emit_wr_scratch(static_cast(*i)); default: return false; } @@ -749,6 +752,45 @@ bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck& instr) return r == 0; } +bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction& instr) +{ + struct r600_bytecode_output cf; + + memset(&cf, 0, sizeof(struct r600_bytecode_output)); + + cf.op = CF_OP_MEM_SCRATCH; + cf.elem_size = 3; + cf.gpr = instr.gpr().sel(); + cf.mark = 1; + cf.comp_mask = instr.write_mask(); + cf.swizzle_x = 0; + cf.swizzle_y = 1; + cf.swizzle_z = 2; + cf.swizzle_w = 3; + cf.burst_count = 1; + + if (instr.indirect()) { + cf.type = 3; + cf.index_gpr = instr.address(); + + /* The docu seems to be wrong here: In indirect addressing the + * address_base seems to be the array_size */ + cf.array_size = instr.array_size(); + } else { + cf.type = 2; + cf.array_base = instr.location(); + } + /* This should be 0, but the address calculation is apparently wrong */ + + + if (r600_bytecode_add_output(m_bc, &cf)){ + R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n"); + return false; + } + + return true; +} + extern const std::map ds_opcode_map; bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst, diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 3bf430438c9..7eb025f46a4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -330,10 +330,56 @@ bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader) nullptr); }; +static void +r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr) +{ + b->cursor = nir_before_instr(&instr->instr); + + int address_index = 0; + int align; + + if (instr->intrinsic == nir_intrinsic_store_scratch) { + align = instr->src[0].ssa->num_components; + address_index = 1; + } else{ + align = instr->dest.ssa.num_components; + } + + nir_ssa_def *address = instr->src[address_index].ssa; + nir_ssa_def *new_address = nir_ishr(b, address, nir_imm_int(b, 4 * align)); + + nir_instr_rewrite_src(&instr->instr, &instr->src[address_index], + nir_src_for_ssa(new_address)); +} + +bool r600_lower_scratch_addresses(nir_shader *shader) +{ + bool progress = false; + nir_foreach_function(function, shader) { + nir_builder build; + nir_builder_init(&build, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr); + if (op->intrinsic != nir_intrinsic_load_scratch && + op->intrinsic != nir_intrinsic_store_scratch) + continue; + r600_nir_lower_scratch_address_impl(&build, op); + progress = true; + } + } + } + return progress; +} + } using r600::r600_nir_lower_int_tg4; using r600::r600_nir_lower_pack_unpack_2x16; +using r600::r600_lower_scratch_addresses; using r600::r600_lower_fs_out_to_vector; int @@ -463,6 +509,11 @@ int r600_shader_from_nir(struct r600_context *rctx, if (optimize) while(optimize_once(sel->nir)); + NIR_PASS_V(sel->nir, nir_lower_vars_to_scratch, + nir_var_function_temp, + 100, + r600_get_natural_size_align_bytes); + while (optimize && optimize_once(sel->nir)); NIR_PASS_V(sel->nir, nir_lower_locals_to_regs); @@ -482,6 +533,7 @@ int r600_shader_from_nir(struct r600_context *rctx, } memset(&pipeshader->shader, 0, sizeof(r600_shader)); + pipeshader->scratch_space_needed = sel->nir->scratch_size; if (sel->nir->info.stage == MESA_SHADER_TESS_EVAL || sel->nir->info.stage == MESA_SHADER_VERTEX || diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp index a0b0d8b7f87..d1f3f2feba0 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp @@ -55,12 +55,13 @@ using namespace std; ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel, - r600_shader &sh_info): + r600_shader &sh_info, int scratch_size): m_processor_type(ptype), m_sh_info(sh_info), m_tex_instr(*this), m_alu_instr(*this), m_pending_else(nullptr), + m_scratch_size(scratch_size), m_next_hwatomic_loc(0), m_sel(sel) { @@ -433,6 +434,10 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins return false; } } + case nir_intrinsic_store_scratch: + return emit_store_scratch(instr); + case nir_intrinsic_load_scratch: + return emit_load_scratch(instr); case nir_intrinsic_store_deref: return emit_store_deref(instr); case nir_intrinsic_load_uniform: @@ -477,6 +482,47 @@ bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan return true; } +bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr) +{ + PValue address = from_nir(instr->src[1], 0, 0); + + std::unique_ptr vec(vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1, + swizzle_from_mask(instr->num_components))); + GPRVector value(*vec); + + int writemask = nir_intrinsic_write_mask(instr); + int align = nir_intrinsic_align_mul(instr); + int align_offset = nir_intrinsic_align_offset(instr); + + WriteScratchInstruction *ir = nullptr; + if (address->type() == Value::literal) { + const auto& lv = dynamic_cast(*address); + ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask); + } else { + address = from_nir_with_fetch_constant(instr->src[1], 0); + ir = new WriteScratchInstruction(address, value, align, align_offset, + writemask, m_scratch_size); + } + emit_instruction(ir); + sh_info().needs_scratch_space = 1; + return true; +} + +bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr) +{ + PValue address = from_nir_with_fetch_constant(instr->src[0], 0); + std::array dst_val; + for (int i = 0; i < 4; ++i) + dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7); + + GPRVector dst(dst_val); + auto ir = new LoadFromScratch(dst, address, m_scratch_size); + ir->prelude_append(new WaitAck(0)); + emit_instruction(ir); + sh_info().needs_scratch_space = 1; + return true; +} + GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src, UNUSED unsigned mask, const GPRVector::Swizzle& swizzle) diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/src/gallium/drivers/r600/sfn/sfn_shader_base.h index b9184fda2fb..3cd9b971e4a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.h @@ -54,7 +54,7 @@ extern SfnLog sfn_log; class ShaderFromNirProcessor : public ValuePool { public: ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel, - r600_shader& sh_info); + r600_shader& sh_info, int scratch_size); virtual ~ShaderFromNirProcessor(); void emit_instruction(Instruction *ir); @@ -147,6 +147,8 @@ private: virtual bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) = 0; virtual bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0; + bool emit_store_scratch(nir_intrinsic_instr* instr); + bool emit_load_scratch(nir_intrinsic_instr* instr); virtual void do_finalize() = 0; void finalize(); @@ -175,6 +177,7 @@ private: OutputRegisterMap m_output_register_map; IfElseInstruction *m_pending_else; + int m_scratch_size; int m_next_hwatomic_loc; r600_pipe_shader_selector& m_sel; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp index 7b0e4c998d2..270cb96d9a4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp @@ -35,7 +35,7 @@ FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh, r600_pipe_shader_selector &sel, const r600_shader_key &key): - ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh), + ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size), m_max_color_exports(MAX2(key.ps.nr_cbufs,1)), m_max_counted_color_exports(0), m_two_sided_color(key.ps.color_two_side), diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp index 7eb67f46c83..e67cd4638f3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp @@ -39,7 +39,8 @@ using std::priority_queue; VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key): - ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader), + ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader, + sh->scratch_space_needed), m_num_clip_dist(0), m_last_param_export(nullptr), m_last_pos_export(nullptr),