Make use of the scratch space for arrays that are larger then 100 elements.
Since for IO r600 is vector based, there is a bit of a scratch space waste
here for arrays that use types smaller then vec4.
Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3225>
m_is_last = true;
}
+WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value,
+ int align, int align_offset, int writemask):
+ WriteoutInstruction (Instruction::mem_wr_scratch, value),
+ m_loc(loc),
+ m_align(align),
+ m_align_offset(align_offset),
+ m_writemask(writemask),
+ m_array_size(0)
+{
+}
+
+WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value,
+ int align, int align_offset, int writemask, int array_size):
+ WriteoutInstruction (Instruction::mem_wr_scratch, value),
+ m_loc(0),
+ m_address(address),
+ m_align(align),
+ m_align_offset(align_offset),
+ m_writemask(writemask),
+ m_array_size(array_size - 1)
+{
+ add_remappable_src_value(&m_address);
+}
+
+bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const
+{
+ if (lhs.type() != Instruction::mem_wr_scratch)
+ return false;
+ const auto& other = dynamic_cast<const WriteScratchInstruction&>(lhs);
+
+ if (m_address) {
+ if (!other.m_address)
+ return false;
+ if (*m_address != *other.m_address)
+ return false;
+ } else {
+ if (other.m_address)
+ return false;
+ }
+
+ return gpr() == other.gpr() &&
+ m_loc == other.m_loc &&
+ m_align == other.m_align &&
+ m_align_offset == other.m_align_offset &&
+ m_writemask == other.m_writemask;
+}
+
+static char *writemask_to_swizzle(int writemask, char *buf)
+{
+ const char *swz = "xyzw";
+ for (int i = 0; i < 4; ++i) {
+ buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
+ }
+ return buf;
+}
+
+void WriteScratchInstruction::do_print(std::ostream& os) const
+{
+ char buf[5];
+
+ os << "MEM_SCRATCH_WRITE ";
+ if (m_address)
+ os << "@" << *m_address << "+";
+
+ os << m_loc << "." << writemask_to_swizzle(m_writemask, buf)
+ << " " << gpr() << " AL:" << m_align << " ALO:" << m_align_offset;
+}
+
+void WriteScratchInstruction::replace_values_child(const ValueSet& candiates, PValue new_value)
+{
+ if (!m_address)
+ return;
+
+ for (auto c: candiates) {
+ if (*c == *m_address)
+ m_address = new_value;
+ }
+}
+
+void WriteScratchInstruction::remap_registers_child(std::vector<rename_reg_pair>& map,
+ ValueMap& values)
+{
+ if (!m_address)
+ return;
+ sfn_log << SfnLog::merge << "Remap " << *m_address << " of type " << m_address->type() << "\n";
+ assert(m_address->type() == Value::gpr);
+ auto new_index = map[m_address->sel()];
+ if (new_index.valid)
+ m_address = values.get_or_inject(new_index.new_reg, m_address->chan());
+ map[m_address->sel()].used = true;
+}
+
StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components,
int array_base, int comp_mask, int out_buffer,
int stream):
bool m_is_last;
};
+class WriteScratchInstruction : public WriteoutInstruction {
+public:
+
+ WriteScratchInstruction(unsigned loc, const GPRVector& value, int align,
+ int align_offset, int writemask);
+ WriteScratchInstruction(const PValue& address, const GPRVector& value,
+ int align, int align_offset, int writemask, int array_size);
+ unsigned location() const {return m_loc;}
+
+ int write_mask() const { return m_writemask;}
+ int address() const { assert(m_address); return m_address->sel();}
+ bool indirect() const { return !!m_address;}
+ int array_size() const { return m_array_size;}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ void replace_values_child(const ValueSet& candiates, PValue new_value) override;
+ void remap_registers_child(std::vector<rename_reg_pair>& map,
+ ValueMap& values)override;
+
+ unsigned m_loc;
+ PValue m_address;
+ unsigned m_align;
+ unsigned m_align_offset;
+ unsigned m_writemask;
+ int m_array_size;
+};
+
+
class StreamOutIntruction: public WriteoutInstruction {
public:
StreamOutIntruction(const GPRVector& value, int num_components,
return m_prelude;
}
+LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size):
+ FetchInstruction(dst, src, scratch_size)
+{
+}
+
static const char *fmt_descr[64] = {
"INVALID",
"8",
std::vector<PInstruction> m_prelude;
};
+class LoadFromScratch: public FetchInstruction {
+public:
+ LoadFromScratch(GPRVector dst, PValue src, int scratch_size);
+};
+
}
#endif // SFN_INSTRUCTION_FETCH_H
bool emit_loop_break(const LoopBreakInstruction& instr);
bool emit_loop_continue(const LoopContInstruction& instr);
bool emit_wait_ack(const WaitAck& instr);
+ bool emit_wr_scratch(const WriteScratchInstruction& instr);
bool emit_load_addr(PValue addr);
bool emit_fs_pixel_export(const ExportInstruction & exi);
return emit_streamout(static_cast<const StreamOutIntruction&>(*i));
case Instruction::wait_ack:
return emit_wait_ack(static_cast<const WaitAck&>(*i));
+ case Instruction::mem_wr_scratch:
+ return emit_wr_scratch(static_cast<const WriteScratchInstruction&>(*i));
default:
return false;
}
return r == 0;
}
+bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction& instr)
+{
+ struct r600_bytecode_output cf;
+
+ memset(&cf, 0, sizeof(struct r600_bytecode_output));
+
+ cf.op = CF_OP_MEM_SCRATCH;
+ cf.elem_size = 3;
+ cf.gpr = instr.gpr().sel();
+ cf.mark = 1;
+ cf.comp_mask = instr.write_mask();
+ cf.swizzle_x = 0;
+ cf.swizzle_y = 1;
+ cf.swizzle_z = 2;
+ cf.swizzle_w = 3;
+ cf.burst_count = 1;
+
+ if (instr.indirect()) {
+ cf.type = 3;
+ cf.index_gpr = instr.address();
+
+ /* The docu seems to be wrong here: In indirect addressing the
+ * address_base seems to be the array_size */
+ cf.array_size = instr.array_size();
+ } else {
+ cf.type = 2;
+ cf.array_base = instr.location();
+ }
+ /* This should be 0, but the address calculation is apparently wrong */
+
+
+ if (r600_bytecode_add_output(m_bc, &cf)){
+ R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
+ return false;
+ }
+
+ return true;
+}
+
extern const std::map<ESDOp, int> ds_opcode_map;
bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
nullptr);
};
+static void
+r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
+{
+ b->cursor = nir_before_instr(&instr->instr);
+
+ int address_index = 0;
+ int align;
+
+ if (instr->intrinsic == nir_intrinsic_store_scratch) {
+ align = instr->src[0].ssa->num_components;
+ address_index = 1;
+ } else{
+ align = instr->dest.ssa.num_components;
+ }
+
+ nir_ssa_def *address = instr->src[address_index].ssa;
+ nir_ssa_def *new_address = nir_ishr(b, address, nir_imm_int(b, 4 * align));
+
+ nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
+ nir_src_for_ssa(new_address));
+}
+
+bool r600_lower_scratch_addresses(nir_shader *shader)
+{
+ bool progress = false;
+ nir_foreach_function(function, shader) {
+ nir_builder build;
+ nir_builder_init(&build, function->impl);
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+ if (op->intrinsic != nir_intrinsic_load_scratch &&
+ op->intrinsic != nir_intrinsic_store_scratch)
+ continue;
+ r600_nir_lower_scratch_address_impl(&build, op);
+ progress = true;
+ }
+ }
+ }
+ return progress;
+}
+
}
using r600::r600_nir_lower_int_tg4;
using r600::r600_nir_lower_pack_unpack_2x16;
+using r600::r600_lower_scratch_addresses;
using r600::r600_lower_fs_out_to_vector;
int
if (optimize)
while(optimize_once(sel->nir));
+ NIR_PASS_V(sel->nir, nir_lower_vars_to_scratch,
+ nir_var_function_temp,
+ 100,
+ r600_get_natural_size_align_bytes);
+
while (optimize && optimize_once(sel->nir));
NIR_PASS_V(sel->nir, nir_lower_locals_to_regs);
}
memset(&pipeshader->shader, 0, sizeof(r600_shader));
+ pipeshader->scratch_space_needed = sel->nir->scratch_size;
if (sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
sel->nir->info.stage == MESA_SHADER_VERTEX ||
ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
r600_pipe_shader_selector& sel,
- r600_shader &sh_info):
+ r600_shader &sh_info, int scratch_size):
m_processor_type(ptype),
m_sh_info(sh_info),
m_tex_instr(*this),
m_alu_instr(*this),
m_pending_else(nullptr),
+ m_scratch_size(scratch_size),
m_next_hwatomic_loc(0),
m_sel(sel)
{
return false;
}
}
+ case nir_intrinsic_store_scratch:
+ return emit_store_scratch(instr);
+ case nir_intrinsic_load_scratch:
+ return emit_load_scratch(instr);
case nir_intrinsic_store_deref:
return emit_store_deref(instr);
case nir_intrinsic_load_uniform:
return true;
}
+bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
+{
+ PValue address = from_nir(instr->src[1], 0, 0);
+
+ std::unique_ptr<GPRVector> vec(vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
+ swizzle_from_mask(instr->num_components)));
+ GPRVector value(*vec);
+
+ int writemask = nir_intrinsic_write_mask(instr);
+ int align = nir_intrinsic_align_mul(instr);
+ int align_offset = nir_intrinsic_align_offset(instr);
+
+ WriteScratchInstruction *ir = nullptr;
+ if (address->type() == Value::literal) {
+ const auto& lv = dynamic_cast<const LiteralValue&>(*address);
+ ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
+ } else {
+ address = from_nir_with_fetch_constant(instr->src[1], 0);
+ ir = new WriteScratchInstruction(address, value, align, align_offset,
+ writemask, m_scratch_size);
+ }
+ emit_instruction(ir);
+ sh_info().needs_scratch_space = 1;
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
+{
+ PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
+ std::array<PValue, 4> dst_val;
+ for (int i = 0; i < 4; ++i)
+ dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
+
+ GPRVector dst(dst_val);
+ auto ir = new LoadFromScratch(dst, address, m_scratch_size);
+ ir->prelude_append(new WaitAck(0));
+ emit_instruction(ir);
+ sh_info().needs_scratch_space = 1;
+ return true;
+}
+
GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
UNUSED unsigned mask,
const GPRVector::Swizzle& swizzle)
class ShaderFromNirProcessor : public ValuePool {
public:
ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel,
- r600_shader& sh_info);
+ r600_shader& sh_info, int scratch_size);
virtual ~ShaderFromNirProcessor();
void emit_instruction(Instruction *ir);
virtual bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) = 0;
virtual bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0;
+ bool emit_store_scratch(nir_intrinsic_instr* instr);
+ bool emit_load_scratch(nir_intrinsic_instr* instr);
virtual void do_finalize() = 0;
void finalize();
OutputRegisterMap m_output_register_map;
IfElseInstruction *m_pending_else;
+ int m_scratch_size;
int m_next_hwatomic_loc;
r600_pipe_shader_selector& m_sel;
r600_shader& sh,
r600_pipe_shader_selector &sel,
const r600_shader_key &key):
- ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh),
+ ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size),
m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
m_max_counted_color_exports(0),
m_two_sided_color(key.ps.color_two_side),
VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
r600_pipe_shader_selector& sel,
const r600_shader_key& key):
- ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader),
+ ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader,
+ sh->scratch_space_needed),
m_num_clip_dist(0),
m_last_param_export(nullptr),
m_last_pos_export(nullptr),