sfn/sfn_value_gpr.cpp \
sfn/sfn_value_gpr.h \
sfn/sfn_valuepool.cpp \
- sfn/sfn_valuepool.h
+ sfn/sfn_valuepool.h \
+ sfn/sfn_vertexstageexport.cpp \
+ sfn/sfn_vertexstageexport.h
R600_GENERATED_FILES = \
egd_tables.h
'sfn/sfn_value_gpr.h',
'sfn/sfn_valuepool.cpp',
'sfn/sfn_valuepool.h',
+ 'sfn/sfn_vertexstageexport.cpp',
+ 'sfn/sfn_vertexstageexport.h',
)
egd_tables_h = custom_target(
switch (shader->info.stage) {
case MESA_SHADER_VERTEX:
- if (key.vs.as_es) {
- sfn_log << SfnLog::trans << "Start VS for GS\n";
- impl.reset(new VertexShaderFromNirForGS(pipe_shader, *sel, key, gs_shader));
- } else if (key.vs.as_ls) {
- sfn_log << "VS: next type TCS and TES not yet supported\n";
- return false;
- } else {
- sfn_log << SfnLog::trans << "Start VS for FS\n";
- impl.reset(new VertexShaderFromNirForFS(pipe_shader, *sel, key));
- }
+ impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader));
+ break;
break;
case MESA_SHADER_GEOMETRY:
sfn_log << SfnLog::trans << "Start GS\n";
bool emit_store_local_shared(nir_intrinsic_instr* instr);
bool emit_barrier(nir_intrinsic_instr* instr);
- const GPRVector *output_register(unsigned location) const;
bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
bool as_last = true);
- void add_param_output_reg(int loc, const GPRVector *gpr);
+
void inc_atomic_file_count();
std::bitset<8> m_sv_values;
VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
r600_pipe_shader_selector& sel,
- const r600_shader_key& key):
- ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader,
- sh->scratch_space_needed),
+ const r600_shader_key& key,
+ struct r600_shader* gs_shader):
+ VertexStage(PIPE_SHADER_VERTEX, sel, sh->shader,
+ sh->scratch_space_needed),
m_num_clip_dist(0),
m_last_param_export(nullptr),
m_last_pos_export(nullptr),
m_pipe_shader(sh),
m_enabled_stream_buffers_mask(0),
m_so_info(&sel.so),
- m_cur_param(0),
- m_cur_clip_pos(1),
m_vertex_id(),
m_key(key)
{
increment_reserved_registers();
sh_info().atomic_base = key.vs.first_atomic_counter;
+ sh_info().vs_as_gs_a = m_key.vs.as_gs_a;
+
+ if (key.vs.as_es) {
+ sh->shader.vs_as_es = true;
+ m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader));
+ } else if (key.vs.as_ls) {
+ sh->shader.vs_as_ls = true;
+ sfn_log << SfnLog::trans << "Start VS for GS\n";
+ m_export_processor.reset(new VertexStageExportForES(*this));
+ } else {
+ m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key));
+ }
}
bool VertexShaderFromNir::do_process_inputs(nir_variable *input)
m_vertex_id.reset(R0x);
inject_register(0, 0, m_vertex_id, false);
+ if (m_key.vs.as_gs_a || m_sv_values.test(es_primitive_id)) {
+ auto R0z = new GPRValue(0,2);
+ R0x->set_as_input();
+ m_primitive_id.reset(R0z);
+ inject_register(0, 2, m_primitive_id, false);
+ }
+
if (m_sv_values.test(es_instanceid)) {
auto R0w = new GPRValue(0,3);
R0w->set_as_input();
inject_register(0, 3, m_instance_id, false);
}
- priority_queue<int, std::vector<int>, std::greater<int>> q;
- for (auto a: m_param_map) {
- q.push(a.first);
- }
- int next_param = 0;
- while (!q.empty()) {
- int loc = q.top();
- q.pop();
- m_param_map[loc] = next_param++;
+ if (m_sv_values.test(es_rel_patch_id)) {
+ auto R0y = new GPRValue(0,1);
+ R0y->set_as_input();
+ m_rel_vertex_id.reset(R0y);
+ inject_register(0, 1, m_rel_vertex_id, false);
}
+
return true;
}
+void VertexShaderFromNir::emit_shader_start()
+{
+ m_export_processor->setup_paramn_map();
+}
+
bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr)
{
switch (instr->type) {
}
}
-bool VertexShaderFromNir::do_process_outputs(nir_variable *output)
+bool VertexShaderFromNir::emit_store_local_shared(nir_intrinsic_instr* instr)
{
- if (output->data.location == VARYING_SLOT_COL0 ||
- output->data.location == VARYING_SLOT_COL1 ||
- (output->data.location >= VARYING_SLOT_VAR0 &&
- output->data.location <= VARYING_SLOT_VAR31) ||
- (output->data.location >= VARYING_SLOT_TEX0 &&
- output->data.location <= VARYING_SLOT_TEX7) ||
- output->data.location == VARYING_SLOT_BFC0 ||
- output->data.location == VARYING_SLOT_BFC1 ||
- output->data.location == VARYING_SLOT_CLIP_VERTEX ||
- output->data.location == VARYING_SLOT_CLIP_DIST0 ||
- output->data.location == VARYING_SLOT_CLIP_DIST1 ||
- output->data.location == VARYING_SLOT_POS ||
- output->data.location == VARYING_SLOT_PSIZ ||
- output->data.location == VARYING_SLOT_FOGC ||
- output->data.location == VARYING_SLOT_LAYER ||
- output->data.location == VARYING_SLOT_EDGE ||
- output->data.location == VARYING_SLOT_VIEWPORT
- ) {
+ unsigned write_mask = nir_intrinsic_write_mask(instr);
- r600_shader_io& io = sh_info().output[output->data.driver_location];
- tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>( output->data.location),
- true, &io.name, &io.sid);
- if (! m_key.vs.as_es)
- evaluate_spi_sid(io);
- ++sh_info().noutput;
+ auto address = from_nir(instr->src[1], 0);
+ int swizzle_base = (write_mask & 0x3) ? 0 : 2;
+ write_mask |= write_mask >> 2;
- if (output->data.location == VARYING_SLOT_PSIZ ||
- output->data.location == VARYING_SLOT_EDGE ||
- output->data.location == VARYING_SLOT_LAYER)
- m_cur_clip_pos = 2;
+ auto value = from_nir(instr->src[0], swizzle_base);
+ if (!(write_mask & 2)) {
+ emit_instruction(new LDSWriteInstruction(address, 1, value));
+ } else {
+ auto value1 = from_nir(instr->src[0], swizzle_base + 1);
+ emit_instruction(new LDSWriteInstruction(address, 1, value, value1));
+ }
- if (output->data.location != VARYING_SLOT_POS &&
- output->data.location != VARYING_SLOT_EDGE &&
- output->data.location != VARYING_SLOT_PSIZ &&
- output->data.location != VARYING_SLOT_CLIP_VERTEX)
- m_param_map[output->data.location] = m_cur_param++;
+ return true;
+}
- return true;
- }
- return false;
+bool VertexShaderFromNir::do_process_outputs(nir_variable *output)
+{
+ return m_export_processor->do_process_outputs(output);
}
bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
return false;
}
-bool VertexShaderFromNir::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
-{
- sh_info().cc_dist_mask = 0xff;
- sh_info().clip_dist_write = 0xff;
-
- std::unique_ptr<GPRVector> clip_vertex(vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3}));
-
- for (int i = 0; i < 4; ++i)
- sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
-
- GPRVector clip_dist[2] = { get_temp_vec4(), get_temp_vec4()};
-
- for (int i = 0; i < 8; i++) {
- int oreg = i >> 2;
- int ochan = i & 3;
- AluInstruction *ir = nullptr;
- for (int j = 0; j < 4; j++) {
- ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j),
- PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
- (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
- emit_instruction(ir);
- }
- ir->set_flag(alu_last_instr);
- }
-
- m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
- emit_export_instruction(m_last_pos_export);
-
- m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
- emit_export_instruction(m_last_pos_export);
-
- return true;
-}
-
-bool VertexShaderFromNir::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
- std::array<uint32_t, 4> *swizzle_override)
-{
- std::array<uint32_t,4> swizzle;
- uint32_t write_mask = 0;
-
- if (swizzle_override) {
- swizzle = *swizzle_override;
- for (int i = 0; i < 4; ++i) {
- if (swizzle[i] < 6)
- write_mask |= 1 << i;
- }
- } else {
- write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
- for (int i = 0; i < 4; ++i)
- swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
- }
-
- sh_info().output[out_var->data.driver_location].write_mask = write_mask;
-
- GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
- set_output(out_var->data.driver_location, PValue(value));
-
- int export_slot = 0;
-
- switch (out_var->data.location) {
- case VARYING_SLOT_EDGE: {
- sh_info().vs_out_misc_write = 1;
- sh_info().vs_out_edgeflag = 1;
- emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
- emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr});
- sh_info().output[out_var->data.driver_location].write_mask = 0xf;
- }
- /* fallthrough */
- case VARYING_SLOT_PSIZ:
- case VARYING_SLOT_LAYER:
- export_slot = 1;
- break;
- case VARYING_SLOT_POS:
- break;
- case VARYING_SLOT_CLIP_DIST0:
- case VARYING_SLOT_CLIP_DIST1:
- export_slot = m_cur_clip_pos++;
- break;
- default:
- sfn_log << SfnLog::err << __func__ << "Unsupported location "
- << out_var->data.location << "\n";
- return false;
- }
-
- m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos);
- emit_export_instruction(m_last_pos_export);
- add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
- return true;
-}
-
-bool VertexShaderFromNir::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
-{
- assert(out_var->data.driver_location < sh_info().noutput);
- sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
-
- int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
- std::array<uint32_t,4> swizzle;
- for (int i = 0; i < 4; ++i)
- swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
-
- sh_info().output[out_var->data.driver_location].write_mask = write_mask;
-
- GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
- sh_info().output[out_var->data.driver_location].gpr = value->sel();
-
- /* This should use the registers!! */
- set_output(out_var->data.driver_location, PValue(value));
-
- auto param_loc = m_param_map.find(out_var->data.location);
- assert(param_loc != m_param_map.end());
-
- m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param);
- emit_export_instruction(m_last_param_export);
- add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
- return true;
-}
-
-bool VertexShaderFromNir::emit_stream(int stream)
-{
- assert(m_so_info);
- if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
- R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
- return false;
- }
- for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
- if (m_so_info->output[i].output_buffer >= 4) {
- R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
- m_so_info->output[i].output_buffer);
- return false;
- }
- }
- const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
- unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
- std::vector<GPRVector> tmp(m_so_info->num_outputs);
-
- /* Initialize locations where the outputs are stored. */
- for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
- if (stream != -1 && stream != m_so_info->output[i].stream)
- continue;
-
- sfn_log << SfnLog::instr << "Emit stream " << i
- << " with register index " << m_so_info->output[i].register_index << " so_gpr:";
-
-
- so_gpr[i] = output_register(m_so_info->output[i].register_index);
-
- if (!so_gpr[i]) {
- sfn_log << SfnLog::err << "\nERR: register index "
- << m_so_info->output[i].register_index
- << " doesn't correspond to an output register\n";
- return false;
- }
- start_comp[i] = m_so_info->output[i].start_component;
- /* Lower outputs with dst_offset < start_component.
- *
- * We can only output 4D vectors with a write mask, e.g. we can
- * only output the W component at offset 3, etc. If we want
- * to store Y, Z, or W at buffer offset 0, we need to use MOV
- * to move it to X and output X. */
- if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
- int tmp_index = allocate_temp_register();
- int sc = m_so_info->output[i].start_component;
- AluInstruction *alu = nullptr;
- for (int j = 0; j < m_so_info->output[i].num_components; j++) {
- PValue dst(new GPRValue(tmp_index, j));
- alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
- tmp[i].set_reg_i(j, dst);
- emit_instruction(alu);
- }
- if (alu)
- alu->set_flag(alu_last_instr);
-
- /* Fill the vector with masked values */
- PValue dst_blank(new GPRValue(tmp_index, 7));
- for (int j = m_so_info->output[i].num_components; j < 4; j++)
- tmp[i].set_reg_i(j, dst_blank);
-
- start_comp[i] = 0;
- so_gpr[i] = &tmp[i];
- }
- sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
- }
-
- /* Write outputs to buffers. */
- for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
- sfn_log << SfnLog::instr << "Write output buffer " << i
- << " with register index " << m_so_info->output[i].register_index << "\n";
-
- StreamOutIntruction *out_stream =
- new StreamOutIntruction(*so_gpr[i],
- m_so_info->output[i].num_components,
- m_so_info->output[i].dst_offset - start_comp[i],
- ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
- m_so_info->output[i].output_buffer,
- m_so_info->output[i].stream);
- emit_export_instruction(out_stream);
- m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
- }
- return true;
-}
-
void VertexShaderFromNir::do_finalize()
{
- if (m_key.vs.as_gs_a) {
- PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
- GPRVector primid({PValue(new GPRValue(0,2)), o,o,o});
- m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
- emit_export_instruction(m_last_param_export);
- int i;
- i = sh_info().noutput++;
- auto& io = sh_info().output[i];
- io.name = TGSI_SEMANTIC_PRIMID;
- io.sid = 0;
- io.gpr = 0;
- io.interpolate = TGSI_INTERPOLATE_CONSTANT;
- io.write_mask = 0x4;
- io.spi_sid = m_key.vs.prim_id_out;
- sh_info().vs_as_gs_a = 1;
- }
-
- finalize_exports();
-}
-
-
-bool VertexShaderFromNirForFS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
-{
-
- switch (out_var->data.location) {
- case VARYING_SLOT_PSIZ:
- sh_info().vs_out_point_size = 1;
- sh_info().vs_out_misc_write = 1;
- /* fallthrough */
- case VARYING_SLOT_POS:
- return emit_varying_pos(out_var, instr);
- case VARYING_SLOT_EDGE: {
- std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
- return emit_varying_pos(out_var, instr, &swizzle_override);
- }
- case VARYING_SLOT_CLIP_VERTEX:
- return emit_clip_vertices(out_var, instr);
- case VARYING_SLOT_CLIP_DIST0:
- case VARYING_SLOT_CLIP_DIST1:
- m_num_clip_dist += 4;
- return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
- case VARYING_SLOT_LAYER: {
- sh_info().vs_out_misc_write = 1;
- sh_info().vs_out_layer = 1;
- std::array<uint32_t, 4> swz = {7,7,0,7};
- return emit_varying_pos(out_var, instr, &swz) &&
- emit_varying_param(out_var, instr);
- }
- case VARYING_SLOT_VIEW_INDEX:
- return emit_varying_pos(out_var, instr) &&
- emit_varying_param(out_var, instr);
-
- default:
- if (out_var->data.location <= VARYING_SLOT_VAR31 ||
- (out_var->data.location >= VARYING_SLOT_TEX0 &&
- out_var->data.location <= VARYING_SLOT_TEX7))
- return emit_varying_param(out_var, instr);
- }
-
- fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
- out_var->data.location);
- return false;
-}
-
-void VertexShaderFromNirForFS::finalize_exports()
-{
- if (m_so_info && m_so_info->num_outputs)
- emit_stream(-1);
-
- m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
-
- if (!m_last_param_export) {
- GPRVector value(0,{7,7,7,7});
- m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
- emit_export_instruction(m_last_param_export);
- }
- m_last_param_export->set_last();
-
- if (!m_last_pos_export) {
- GPRVector value(0,{7,7,7,7});
- m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
- emit_export_instruction(m_last_pos_export);
- }
- m_last_pos_export->set_last();
-
-}
-
-VertexShaderFromNirForGS::VertexShaderFromNirForGS(r600_pipe_shader *sh,
- r600_pipe_shader_selector& sel,
- const r600_shader_key &key,
- const r600_shader *gs_shader):
- VertexShaderFromNir(sh, sel, key),
- m_gs_shader(gs_shader)
-{
- sh->shader.vs_as_es = true;
-}
-
-bool VertexShaderFromNirForGS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
-{
-
- assert(m_gs_shader);
-
- int ring_offset = -1;
- const r600_shader_io& out_io = sh_info().output[out_var->data.driver_location];
-
- sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
- << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
- for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
- auto& in_io = m_gs_shader->input[k];
- sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
-
- if (in_io.name == out_io.name &&
- in_io.sid == out_io.sid) {
- ring_offset = in_io.ring_offset;
- break;
- }
- }
-
- if (out_var->data.location == VARYING_SLOT_VIEWPORT)
- return true;
-
- if (ring_offset == -1) {
- sfn_log << SfnLog::err << "VS defines output at "
- << out_var->data.driver_location << "name=" << out_io.name
- << " sid=" << out_io.sid << " that is not consumed as GS input\n";
- return true;
- }
-
- uint32_t write_mask = (1 << instr->num_components) - 1;
-
- std::unique_ptr<GPRVector> value(vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
- swizzle_from_mask(instr->num_components)));
-
- auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value,
- ring_offset >> 2, 4, PValue());
- emit_export_instruction(ir);
-
- sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
- if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
- out_var->data.location == VARYING_SLOT_CLIP_DIST1)
- m_num_clip_dist += 4;
-
- return true;
-}
-
-void VertexShaderFromNirForGS::finalize_exports()
-{
-}
-
-
-VertexShaderFromNirForES::VertexShaderFromNirForES(r600_pipe_shader *sh,
- UNUSED const pipe_stream_output_info *so_info,
- r600_pipe_shader_selector& sel,
- const r600_shader_key &key):
- VertexShaderFromNir(sh, sel, key)
-{
-}
-
-bool VertexShaderFromNirForES::do_emit_store_deref(UNUSED const nir_variable *out_var,
- UNUSED nir_intrinsic_instr* instr)
-{
- return false;
+ m_export_processor->finalize_exports();
}
-void VertexShaderFromNirForES::finalize_exports()
+bool VertexShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
{
+ return m_export_processor->store_deref(out_var, instr);
}
}
#define sfn_vertex_shader_from_nir_h
#include "sfn_shader_base.h"
+#include "sfn_vertexstageexport.h"
namespace r600 {
-class VertexShaderFromNir : public ShaderFromNirProcessor {
+class VertexShaderFromNir : public VertexStage {
public:
VertexShaderFromNir(r600_pipe_shader *sh,
r600_pipe_shader_selector &sel,
- const r600_shader_key &key);
+ const r600_shader_key &key, r600_shader *gs_shader);
bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override;
bool scan_sysvalue_access(nir_instr *instr) override;
+
+ PValue primitive_id() override {return m_primitive_id;}
protected:
- bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
- std::array<uint32_t, 4> *swizzle_override = nullptr);
- bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr);
- bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr);
- bool emit_stream(int stream);
// todo: encapsulate
unsigned m_num_clip_dist;
unsigned m_enabled_stream_buffers_mask;
const pipe_stream_output_info *m_so_info;
void do_finalize() override;
+
+ std::map<unsigned, unsigned> m_param_map;
private:
+ bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
+ void finalize_exports();
+ void emit_shader_start() override;
bool do_process_inputs(nir_variable *input) override;
bool allocate_reserved_registers() override;
bool do_process_outputs(nir_variable *output) override;
bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
-
- virtual void finalize_exports() = 0;
-
- unsigned m_cur_param;
- std::map<unsigned, unsigned> m_param_map;
- unsigned m_cur_clip_pos;
+ bool emit_store_local_shared(nir_intrinsic_instr* instr);
PValue m_vertex_id;
PValue m_instance_id;
+ PValue m_rel_vertex_id;
+ PValue m_primitive_id;
r600_shader_key m_key;
-};
-class VertexShaderFromNirForFS : public VertexShaderFromNir {
-public:
- using VertexShaderFromNir::VertexShaderFromNir;
-
- bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
-private:
- void finalize_exports() override;
-};
-
-class VertexShaderFromNirForGS : public VertexShaderFromNir {
-public:
- VertexShaderFromNirForGS(r600_pipe_shader *sh, r600_pipe_shader_selector &sel,
- const r600_shader_key &key, const r600_shader *gs_shader);
- bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
- void finalize_exports() override;
-
- const r600_shader *m_gs_shader;
-};
-
-class VertexShaderFromNirForES : public VertexShaderFromNir {
-public:
- VertexShaderFromNirForES(r600_pipe_shader *sh, const pipe_stream_output_info *so_info, r600_pipe_shader_selector &sel,
- const r600_shader_key &key);
- bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
- void finalize_exports() override;
+ std::unique_ptr<VertexStageExportBase> m_export_processor;
};
}
--- /dev/null
+#include "sfn_vertexstageexport.h"
+
+#include "tgsi/tgsi_from_mesa.h"
+
+namespace r600 {
+
+using std::priority_queue;
+
+VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
+ m_proc(proc),
+ m_cur_clip_pos(1),
+ m_cur_param(0)
+{
+
+}
+
+VertexStageExportBase::~VertexStageExportBase()
+{
+
+}
+
+VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
+ const pipe_stream_output_info *so_info,
+ r600_pipe_shader *pipe_shader, const r600_shader_key &key):
+ VertexStageExportBase(proc),
+ m_last_param_export(nullptr),
+ m_last_pos_export(nullptr),
+ m_num_clip_dist(0),
+ m_enabled_stream_buffers_mask(0),
+ m_so_info(so_info),
+ m_pipe_shader(pipe_shader),
+ m_key(key)
+{
+}
+
+void VertexStageExportBase::setup_paramn_map()
+{
+ priority_queue<int, std::vector<int>, std::greater<int>> q;
+ for (auto a: m_param_map) {
+ q.push(a.first);
+ }
+
+ int next_param = 0;
+ while (!q.empty()) {
+ int loc = q.top();
+ q.pop();
+ m_param_map[loc] = next_param++;
+ }
+}
+
+bool VertexStageExportBase::do_process_outputs(nir_variable *output)
+{
+ if (output->data.location == VARYING_SLOT_COL0 ||
+ output->data.location == VARYING_SLOT_COL1 ||
+ (output->data.location >= VARYING_SLOT_VAR0 &&
+ output->data.location <= VARYING_SLOT_VAR31) ||
+ (output->data.location >= VARYING_SLOT_TEX0 &&
+ output->data.location <= VARYING_SLOT_TEX7) ||
+ output->data.location == VARYING_SLOT_BFC0 ||
+ output->data.location == VARYING_SLOT_BFC1 ||
+ output->data.location == VARYING_SLOT_CLIP_VERTEX ||
+ output->data.location == VARYING_SLOT_CLIP_DIST0 ||
+ output->data.location == VARYING_SLOT_CLIP_DIST1 ||
+ output->data.location == VARYING_SLOT_POS ||
+ output->data.location == VARYING_SLOT_PSIZ ||
+ output->data.location == VARYING_SLOT_FOGC ||
+ output->data.location == VARYING_SLOT_LAYER ||
+ output->data.location == VARYING_SLOT_EDGE ||
+ output->data.location == VARYING_SLOT_VIEWPORT
+ ) {
+
+ r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
+ tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>( output->data.location),
+ true, &io.name, &io.sid);
+
+ m_proc.evaluate_spi_sid(io);
+ io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
+ << output->data.location_frac;
+ ++m_proc.sh_info().noutput;
+
+ if (output->data.location == VARYING_SLOT_PSIZ ||
+ output->data.location == VARYING_SLOT_EDGE ||
+ output->data.location == VARYING_SLOT_LAYER)
+ m_cur_clip_pos = 2;
+
+ if (output->data.location != VARYING_SLOT_POS &&
+ output->data.location != VARYING_SLOT_EDGE &&
+ output->data.location != VARYING_SLOT_PSIZ &&
+ output->data.location != VARYING_SLOT_CLIP_VERTEX)
+ m_param_map[output->data.location] = m_cur_param++;
+
+ return true;
+ }
+ return false;
+}
+
+
+bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+
+ switch (out_var->data.location) {
+ case VARYING_SLOT_PSIZ:
+ m_proc.sh_info().vs_out_point_size = 1;
+ m_proc.sh_info().vs_out_misc_write = 1;
+ /* fallthrough */
+ case VARYING_SLOT_POS:
+ return emit_varying_pos(out_var, instr);
+ case VARYING_SLOT_EDGE: {
+ std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
+ return emit_varying_pos(out_var, instr, &swizzle_override);
+ }
+ case VARYING_SLOT_CLIP_VERTEX:
+ return emit_clip_vertices(out_var, instr);
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ m_num_clip_dist += 4;
+ return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr);
+ case VARYING_SLOT_LAYER: {
+ m_proc.sh_info().vs_out_misc_write = 1;
+ m_proc.sh_info().vs_out_layer = 1;
+ std::array<uint32_t, 4> swz = {7,7,0,7};
+ return emit_varying_pos(out_var, instr, &swz) &&
+ emit_varying_param(out_var, instr);
+ }
+ case VARYING_SLOT_VIEW_INDEX:
+ return emit_varying_pos(out_var, instr) &&
+ emit_varying_param(out_var, instr);
+
+ default:
+ if (out_var->data.location <= VARYING_SLOT_VAR31 ||
+ (out_var->data.location >= VARYING_SLOT_TEX0 &&
+ out_var->data.location <= VARYING_SLOT_TEX7))
+ return emit_varying_param(out_var, instr);
+ }
+
+ fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
+ out_var->data.location);
+ return false;
+}
+
+bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
+ std::array<uint32_t, 4> *swizzle_override)
+{
+ std::array<uint32_t,4> swizzle;
+ uint32_t write_mask = 0;
+
+ if (swizzle_override) {
+ swizzle = *swizzle_override;
+ for (int i = 0; i < 4; ++i) {
+ if (swizzle[i] < 6)
+ write_mask |= 1 << i;
+ }
+ } else {
+ write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
+ }
+
+ m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
+
+ GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
+ m_proc.set_output(out_var->data.driver_location, PValue(value));
+
+ int export_slot = 0;
+
+ switch (out_var->data.location) {
+ case VARYING_SLOT_EDGE: {
+ m_proc.sh_info().vs_out_misc_write = 1;
+ m_proc.sh_info().vs_out_edgeflag = 1;
+ m_proc.emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
+ m_proc.emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr});
+ m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf;
+ }
+ /* fallthrough */
+ case VARYING_SLOT_PSIZ:
+ case VARYING_SLOT_LAYER:
+ export_slot = 1;
+ break;
+ case VARYING_SLOT_POS:
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ export_slot = m_cur_clip_pos++;
+ break;
+ default:
+ sfn_log << SfnLog::err << __func__ << "Unsupported location "
+ << out_var->data.location << "\n";
+ return false;
+ }
+
+ m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos);
+ m_proc.emit_export_instruction(m_last_pos_export);
+ m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr());
+ return true;
+}
+
+bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+ assert(out_var->data.driver_location < m_proc.sh_info().noutput);
+ sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n";
+
+ int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac;
+ std::array<uint32_t,4> swizzle;
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7;
+
+ m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask;
+
+ GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle);
+ m_proc.sh_info().output[out_var->data.driver_location].gpr = value->sel();
+
+ /* This should use the registers!! */
+ m_proc.set_output(out_var->data.driver_location, PValue(value));
+
+ auto param_loc = m_param_map.find(out_var->data.location);
+ assert(param_loc != m_param_map.end());
+
+ m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param);
+ m_proc.emit_export_instruction(m_last_param_export);
+ m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr());
+ return true;
+}
+
+bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+ m_proc.sh_info().cc_dist_mask = 0xff;
+ m_proc.sh_info().clip_dist_write = 0xff;
+
+ std::unique_ptr<GPRVector> clip_vertex(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3}));
+
+ for (int i = 0; i < 4; ++i)
+ m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i;
+
+ GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
+
+ for (int i = 0; i < 8; i++) {
+ int oreg = i >> 2;
+ int ochan = i & 3;
+ AluInstruction *ir = nullptr;
+ for (int j = 0; j < 4; j++) {
+ ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j),
+ PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
+ (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
+ m_proc.emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ }
+
+ m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
+ m_proc.emit_export_instruction(m_last_pos_export);
+
+ m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
+ m_proc.emit_export_instruction(m_last_pos_export);
+
+ return true;
+}
+
+void VertexStageExportForFS::finalize_exports()
+{
+ if (m_key.vs.as_gs_a) {
+ PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
+ GPRVector primid({m_proc.primitive_id(), o,o,o});
+ m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param);
+ m_proc.emit_export_instruction(m_last_param_export);
+ int i;
+ i = m_proc.sh_info().noutput++;
+ auto& io = m_proc.sh_info().output[i];
+ io.name = TGSI_SEMANTIC_PRIMID;
+ io.sid = 0;
+ io.gpr = 0;
+ io.interpolate = TGSI_INTERPOLATE_CONSTANT;
+ io.write_mask = 0x1;
+ io.spi_sid = m_key.vs.prim_id_out;
+ m_proc.sh_info().vs_as_gs_a = 1;
+ }
+
+ if (m_so_info && m_so_info->num_outputs)
+ emit_stream(-1);
+
+ m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
+
+ if (!m_last_param_export) {
+ GPRVector value(0,{7,7,7,7});
+ m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
+ m_proc.emit_export_instruction(m_last_param_export);
+ }
+ m_last_param_export->set_last();
+
+ if (!m_last_pos_export) {
+ GPRVector value(0,{7,7,7,7});
+ m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
+ m_proc.emit_export_instruction(m_last_pos_export);
+ }
+ m_last_pos_export->set_last();
+}
+
+bool VertexStageExportForFS::emit_stream(int stream)
+{
+ assert(m_so_info);
+ if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
+ R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
+ return false;
+ }
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ if (m_so_info->output[i].output_buffer >= 4) {
+ R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
+ m_so_info->output[i].output_buffer);
+ return false;
+ }
+ }
+ const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+ unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
+ std::vector<GPRVector> tmp(m_so_info->num_outputs);
+
+ /* Initialize locations where the outputs are stored. */
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ if (stream != -1 && stream != m_so_info->output[i].stream)
+ continue;
+
+ sfn_log << SfnLog::instr << "Emit stream " << i
+ << " with register index " << m_so_info->output[i].register_index << " so_gpr:";
+
+
+ so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
+
+ if (!so_gpr[i]) {
+ sfn_log << SfnLog::err << "\nERR: register index "
+ << m_so_info->output[i].register_index
+ << " doesn't correspond to an output register\n";
+ return false;
+ }
+ start_comp[i] = m_so_info->output[i].start_component;
+ /* Lower outputs with dst_offset < start_component.
+ *
+ * We can only output 4D vectors with a write mask, e.g. we can
+ * only output the W component at offset 3, etc. If we want
+ * to store Y, Z, or W at buffer offset 0, we need to use MOV
+ * to move it to X and output X. */
+ if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
+ int tmp_index = m_proc.allocate_temp_register();
+ int sc = m_so_info->output[i].start_component;
+ AluInstruction *alu = nullptr;
+ for (int j = 0; j < m_so_info->output[i].num_components; j++) {
+ PValue dst(new GPRValue(tmp_index, j));
+ alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write});
+ tmp[i].set_reg_i(j, dst);
+ m_proc.emit_instruction(alu);
+ }
+ if (alu)
+ alu->set_flag(alu_last_instr);
+
+ /* Fill the vector with masked values */
+ PValue dst_blank(new GPRValue(tmp_index, 7));
+ for (int j = m_so_info->output[i].num_components; j < 4; j++)
+ tmp[i].set_reg_i(j, dst_blank);
+
+ start_comp[i] = 0;
+ so_gpr[i] = &tmp[i];
+ }
+ sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
+ }
+
+ /* Write outputs to buffers. */
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ sfn_log << SfnLog::instr << "Write output buffer " << i
+ << " with register index " << m_so_info->output[i].register_index << "\n";
+
+ StreamOutIntruction *out_stream =
+ new StreamOutIntruction(*so_gpr[i],
+ m_so_info->output[i].num_components,
+ m_so_info->output[i].dst_offset - start_comp[i],
+ ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
+ m_so_info->output[i].output_buffer,
+ m_so_info->output[i].stream);
+ m_proc.emit_export_instruction(out_stream);
+ m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
+ }
+ return true;
+}
+
+
+VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
+ const r600_shader *gs_shader):
+ VertexStageExportBase(proc),
+ m_gs_shader(gs_shader)
+{
+
+}
+
+bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+
+ int ring_offset = -1;
+ const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location];
+
+ sfn_log << SfnLog::io << "check output " << out_var->data.driver_location
+ << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
+ for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
+ auto& in_io = m_gs_shader->input[k];
+ sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
+
+ if (in_io.name == out_io.name &&
+ in_io.sid == out_io.sid) {
+ ring_offset = in_io.ring_offset;
+ break;
+ }
+ }
+
+ if (out_var->data.location == VARYING_SLOT_VIEWPORT)
+ return true;
+
+ if (ring_offset == -1) {
+ sfn_log << SfnLog::err << "VS defines output at "
+ << out_var->data.driver_location << "name=" << out_io.name
+ << " sid=" << out_io.sid << " that is not consumed as GS input\n";
+ return true;
+ }
+
+ uint32_t write_mask = (1 << instr->num_components) - 1;
+
+ std::unique_ptr<GPRVector> value(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask,
+ swizzle_from_mask(instr->num_components)));
+
+ auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value,
+ ring_offset >> 2, 4, PValue());
+ m_proc.emit_export_instruction(ir);
+
+ m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask;
+ if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 ||
+ out_var->data.location == VARYING_SLOT_CLIP_DIST1)
+ m_num_clip_dist += 4;
+
+ return true;
+}
+
+void VertexStageExportForGS::finalize_exports()
+{
+
+}
+
+VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
+ VertexStageExportBase(proc)
+{
+}
+
+bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
+{
+ return true;
+}
+
+void VertexStageExportForES::finalize_exports()
+{
+
+}
+
+}
--- /dev/null
+#ifndef VERTEXSTAGEEXPORT_H
+#define VERTEXSTAGEEXPORT_H
+
+#include "sfn_shader_base.h"
+
+namespace r600 {
+
+class VertexStage : public ShaderFromNirProcessor {
+public:
+ using ShaderFromNirProcessor::ShaderFromNirProcessor;
+
+ virtual PValue primitive_id() = 0;
+};
+
+class VertexStageExportBase
+{
+public:
+ VertexStageExportBase(VertexStage& proc);
+ virtual ~VertexStageExportBase();
+ void setup_paramn_map();
+ virtual bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0;
+ virtual void finalize_exports() = 0;
+ virtual bool do_process_outputs(nir_variable *output);
+ int cur_param() const {return m_cur_param;}
+protected:
+ VertexStage& m_proc;
+ std::map<unsigned, unsigned> m_param_map;
+ int m_cur_clip_pos;
+ int m_cur_param;
+};
+
+class VertexStageExportForFS : public VertexStageExportBase
+{
+public:
+ VertexStageExportForFS(VertexStage& proc,
+ const pipe_stream_output_info *so_info,
+ r600_pipe_shader *pipe_shader,
+ const r600_shader_key& key);
+
+ bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
+ void finalize_exports() override;
+
+private:
+ bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr);
+ bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr,
+ std::array<uint32_t, 4> *swizzle_override = nullptr);
+ bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr);
+ bool emit_stream(int stream);
+
+ ExportInstruction *m_last_param_export;
+ ExportInstruction *m_last_pos_export;
+
+ int m_num_clip_dist;
+ int m_enabled_stream_buffers_mask;
+ const pipe_stream_output_info *m_so_info;
+ r600_pipe_shader *m_pipe_shader;
+ const r600_shader_key& m_key;
+
+};
+
+class VertexStageExportForGS : public VertexStageExportBase
+{
+public:
+ VertexStageExportForGS(VertexStage& proc,
+ const r600_shader *gs_shader);
+ bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
+ void finalize_exports() override;
+
+private:
+ unsigned m_num_clip_dist;
+ const r600_shader *m_gs_shader;
+};
+
+class VertexStageExportForES : public VertexStageExportBase
+{
+public:
+ VertexStageExportForES(VertexStage& proc);
+ bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
+ void finalize_exports() override;
+};
+
+
+}
+
+#endif // VERTEXSTAGEEXPORT_H