From: Gert Wollny Date: Wed, 15 Apr 2020 14:40:49 +0000 (+0200) Subject: r600/sfn: extract class to handle the VS export to different stages X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f7df2c57a207a386ba0d2130541ac9d0546670e1;p=mesa.git r600/sfn: extract class to handle the VS export to different stages This code can be shared with the TESS_EVAL shader Signed-off-by: Gert Wollny Part-of: --- diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index 673b8a0ac4d..45342e4ad21 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -152,7 +152,9 @@ CXX_SOURCES = \ sfn/sfn_value_gpr.cpp \ sfn/sfn_value_gpr.h \ sfn/sfn_valuepool.cpp \ - sfn/sfn_valuepool.h + sfn/sfn_valuepool.h \ + sfn/sfn_vertexstageexport.cpp \ + sfn/sfn_vertexstageexport.h R600_GENERATED_FILES = \ egd_tables.h diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build index fe369a99dab..468a8165dd5 100644 --- a/src/gallium/drivers/r600/meson.build +++ b/src/gallium/drivers/r600/meson.build @@ -170,6 +170,8 @@ files_r600 = files( 'sfn/sfn_value_gpr.h', 'sfn/sfn_valuepool.cpp', 'sfn/sfn_valuepool.h', + 'sfn/sfn_vertexstageexport.cpp', + 'sfn/sfn_vertexstageexport.h', ) egd_tables_h = custom_target( diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 36ea68753e9..e9a598703f3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -60,16 +60,8 @@ bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shade switch (shader->info.stage) { case MESA_SHADER_VERTEX: - if (key.vs.as_es) { - sfn_log << SfnLog::trans << "Start VS for GS\n"; - impl.reset(new VertexShaderFromNirForGS(pipe_shader, *sel, key, gs_shader)); - } else if (key.vs.as_ls) { - sfn_log << "VS: next type TCS and TES not yet supported\n"; - return false; - } else { - sfn_log << SfnLog::trans << "Start VS for FS\n"; - impl.reset(new VertexShaderFromNirForFS(pipe_shader, *sel, key)); - } + impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader)); + break; break; case MESA_SHADER_GEOMETRY: sfn_log << SfnLog::trans << "Start GS\n"; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/src/gallium/drivers/r600/sfn/sfn_shader_base.h index c747cc6db9a..70062db6258 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_base.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.h @@ -105,11 +105,10 @@ protected: bool emit_store_local_shared(nir_intrinsic_instr* instr); bool emit_barrier(nir_intrinsic_instr* instr); - const GPRVector *output_register(unsigned location) const; bool load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last = true); - void add_param_output_reg(int loc, const GPRVector *gpr); + void inc_atomic_file_count(); std::bitset<8> m_sv_values; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp index 67eb357a134..ba8a583d780 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp @@ -38,17 +38,16 @@ using std::priority_queue; VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, - const r600_shader_key& key): - ShaderFromNirProcessor (PIPE_SHADER_VERTEX, sel, sh->shader, - sh->scratch_space_needed), + const r600_shader_key& key, + struct r600_shader* gs_shader): + VertexStage(PIPE_SHADER_VERTEX, sel, sh->shader, + sh->scratch_space_needed), m_num_clip_dist(0), m_last_param_export(nullptr), m_last_pos_export(nullptr), m_pipe_shader(sh), m_enabled_stream_buffers_mask(0), m_so_info(&sel.so), - m_cur_param(0), - m_cur_clip_pos(1), m_vertex_id(), m_key(key) { @@ -56,6 +55,18 @@ VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh, increment_reserved_registers(); sh_info().atomic_base = key.vs.first_atomic_counter; + sh_info().vs_as_gs_a = m_key.vs.as_gs_a; + + if (key.vs.as_es) { + sh->shader.vs_as_es = true; + m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader)); + } else if (key.vs.as_ls) { + sh->shader.vs_as_ls = true; + sfn_log << SfnLog::trans << "Start VS for GS\n"; + m_export_processor.reset(new VertexStageExportForES(*this)); + } else { + m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key)); + } } bool VertexShaderFromNir::do_process_inputs(nir_variable *input) @@ -80,6 +91,13 @@ bool VertexShaderFromNir::allocate_reserved_registers() m_vertex_id.reset(R0x); inject_register(0, 0, m_vertex_id, false); + if (m_key.vs.as_gs_a || m_sv_values.test(es_primitive_id)) { + auto R0z = new GPRValue(0,2); + R0x->set_as_input(); + m_primitive_id.reset(R0z); + inject_register(0, 2, m_primitive_id, false); + } + if (m_sv_values.test(es_instanceid)) { auto R0w = new GPRValue(0,3); R0w->set_as_input(); @@ -87,20 +105,22 @@ bool VertexShaderFromNir::allocate_reserved_registers() inject_register(0, 3, m_instance_id, false); } - priority_queue, std::greater> q; - for (auto a: m_param_map) { - q.push(a.first); - } - int next_param = 0; - while (!q.empty()) { - int loc = q.top(); - q.pop(); - m_param_map[loc] = next_param++; + if (m_sv_values.test(es_rel_patch_id)) { + auto R0y = new GPRValue(0,1); + R0y->set_as_input(); + m_rel_vertex_id.reset(R0y); + inject_register(0, 1, m_rel_vertex_id, false); } + return true; } +void VertexShaderFromNir::emit_shader_start() +{ + m_export_processor->setup_paramn_map(); +} + bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr) { switch (instr->type) { @@ -135,48 +155,28 @@ bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_inst } } -bool VertexShaderFromNir::do_process_outputs(nir_variable *output) +bool VertexShaderFromNir::emit_store_local_shared(nir_intrinsic_instr* instr) { - if (output->data.location == VARYING_SLOT_COL0 || - output->data.location == VARYING_SLOT_COL1 || - (output->data.location >= VARYING_SLOT_VAR0 && - output->data.location <= VARYING_SLOT_VAR31) || - (output->data.location >= VARYING_SLOT_TEX0 && - output->data.location <= VARYING_SLOT_TEX7) || - output->data.location == VARYING_SLOT_BFC0 || - output->data.location == VARYING_SLOT_BFC1 || - output->data.location == VARYING_SLOT_CLIP_VERTEX || - output->data.location == VARYING_SLOT_CLIP_DIST0 || - output->data.location == VARYING_SLOT_CLIP_DIST1 || - output->data.location == VARYING_SLOT_POS || - output->data.location == VARYING_SLOT_PSIZ || - output->data.location == VARYING_SLOT_FOGC || - output->data.location == VARYING_SLOT_LAYER || - output->data.location == VARYING_SLOT_EDGE || - output->data.location == VARYING_SLOT_VIEWPORT - ) { + unsigned write_mask = nir_intrinsic_write_mask(instr); - r600_shader_io& io = sh_info().output[output->data.driver_location]; - tgsi_get_gl_varying_semantic(static_cast( output->data.location), - true, &io.name, &io.sid); - if (! m_key.vs.as_es) - evaluate_spi_sid(io); - ++sh_info().noutput; + auto address = from_nir(instr->src[1], 0); + int swizzle_base = (write_mask & 0x3) ? 0 : 2; + write_mask |= write_mask >> 2; - if (output->data.location == VARYING_SLOT_PSIZ || - output->data.location == VARYING_SLOT_EDGE || - output->data.location == VARYING_SLOT_LAYER) - m_cur_clip_pos = 2; + auto value = from_nir(instr->src[0], swizzle_base); + if (!(write_mask & 2)) { + emit_instruction(new LDSWriteInstruction(address, 1, value)); + } else { + auto value1 = from_nir(instr->src[0], swizzle_base + 1); + emit_instruction(new LDSWriteInstruction(address, 1, value, value1)); + } - if (output->data.location != VARYING_SLOT_POS && - output->data.location != VARYING_SLOT_EDGE && - output->data.location != VARYING_SLOT_PSIZ && - output->data.location != VARYING_SLOT_CLIP_VERTEX) - m_param_map[output->data.location] = m_cur_param++; + return true; +} - return true; - } - return false; +bool VertexShaderFromNir::do_process_outputs(nir_variable *output) +{ + return m_export_processor->do_process_outputs(output); } bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) @@ -199,375 +199,14 @@ bool VertexShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_int return false; } -bool VertexShaderFromNir::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr) -{ - sh_info().cc_dist_mask = 0xff; - sh_info().clip_dist_write = 0xff; - - std::unique_ptr clip_vertex(vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3})); - - for (int i = 0; i < 4; ++i) - sh_info().output[out_var->data.driver_location].write_mask |= 1 << i; - - GPRVector clip_dist[2] = { get_temp_vec4(), get_temp_vec4()}; - - for (int i = 0; i < 8; i++) { - int oreg = i >> 2; - int ochan = i & 3; - AluInstruction *ir = nullptr; - for (int j = 0; j < 4; j++) { - ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j), - PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)), - (j == ochan) ? EmitInstruction::write : EmitInstruction::empty); - emit_instruction(ir); - } - ir->set_flag(alu_last_instr); - } - - m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos); - emit_export_instruction(m_last_pos_export); - - m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos); - emit_export_instruction(m_last_pos_export); - - return true; -} - -bool VertexShaderFromNir::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr, - std::array *swizzle_override) -{ - std::array swizzle; - uint32_t write_mask = 0; - - if (swizzle_override) { - swizzle = *swizzle_override; - for (int i = 0; i < 4; ++i) { - if (swizzle[i] < 6) - write_mask |= 1 << i; - } - } else { - write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac; - for (int i = 0; i < 4; ++i) - swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7; - } - - sh_info().output[out_var->data.driver_location].write_mask = write_mask; - - GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle); - set_output(out_var->data.driver_location, PValue(value)); - - int export_slot = 0; - - switch (out_var->data.location) { - case VARYING_SLOT_EDGE: { - sh_info().vs_out_misc_write = 1; - sh_info().vs_out_edgeflag = 1; - emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr}); - emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr}); - sh_info().output[out_var->data.driver_location].write_mask = 0xf; - } - /* fallthrough */ - case VARYING_SLOT_PSIZ: - case VARYING_SLOT_LAYER: - export_slot = 1; - break; - case VARYING_SLOT_POS: - break; - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - export_slot = m_cur_clip_pos++; - break; - default: - sfn_log << SfnLog::err << __func__ << "Unsupported location " - << out_var->data.location << "\n"; - return false; - } - - m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos); - emit_export_instruction(m_last_pos_export); - add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr()); - return true; -} - -bool VertexShaderFromNir::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr) -{ - assert(out_var->data.driver_location < sh_info().noutput); - sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n"; - - int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac; - std::array swizzle; - for (int i = 0; i < 4; ++i) - swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7; - - sh_info().output[out_var->data.driver_location].write_mask = write_mask; - - GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle); - sh_info().output[out_var->data.driver_location].gpr = value->sel(); - - /* This should use the registers!! */ - set_output(out_var->data.driver_location, PValue(value)); - - auto param_loc = m_param_map.find(out_var->data.location); - assert(param_loc != m_param_map.end()); - - m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param); - emit_export_instruction(m_last_param_export); - add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr()); - return true; -} - -bool VertexShaderFromNir::emit_stream(int stream) -{ - assert(m_so_info); - if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) { - R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs); - return false; - } - for (unsigned i = 0; i < m_so_info->num_outputs; i++) { - if (m_so_info->output[i].output_buffer >= 4) { - R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", - m_so_info->output[i].output_buffer); - return false; - } - } - const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS]; - unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; - std::vector tmp(m_so_info->num_outputs); - - /* Initialize locations where the outputs are stored. */ - for (unsigned i = 0; i < m_so_info->num_outputs; i++) { - if (stream != -1 && stream != m_so_info->output[i].stream) - continue; - - sfn_log << SfnLog::instr << "Emit stream " << i - << " with register index " << m_so_info->output[i].register_index << " so_gpr:"; - - - so_gpr[i] = output_register(m_so_info->output[i].register_index); - - if (!so_gpr[i]) { - sfn_log << SfnLog::err << "\nERR: register index " - << m_so_info->output[i].register_index - << " doesn't correspond to an output register\n"; - return false; - } - start_comp[i] = m_so_info->output[i].start_component; - /* Lower outputs with dst_offset < start_component. - * - * We can only output 4D vectors with a write mask, e.g. we can - * only output the W component at offset 3, etc. If we want - * to store Y, Z, or W at buffer offset 0, we need to use MOV - * to move it to X and output X. */ - if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) { - int tmp_index = allocate_temp_register(); - int sc = m_so_info->output[i].start_component; - AluInstruction *alu = nullptr; - for (int j = 0; j < m_so_info->output[i].num_components; j++) { - PValue dst(new GPRValue(tmp_index, j)); - alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write}); - tmp[i].set_reg_i(j, dst); - emit_instruction(alu); - } - if (alu) - alu->set_flag(alu_last_instr); - - /* Fill the vector with masked values */ - PValue dst_blank(new GPRValue(tmp_index, 7)); - for (int j = m_so_info->output[i].num_components; j < 4; j++) - tmp[i].set_reg_i(j, dst_blank); - - start_comp[i] = 0; - so_gpr[i] = &tmp[i]; - } - sfn_log << SfnLog::instr << *so_gpr[i] << "\n"; - } - - /* Write outputs to buffers. */ - for (unsigned i = 0; i < m_so_info->num_outputs; i++) { - sfn_log << SfnLog::instr << "Write output buffer " << i - << " with register index " << m_so_info->output[i].register_index << "\n"; - - StreamOutIntruction *out_stream = - new StreamOutIntruction(*so_gpr[i], - m_so_info->output[i].num_components, - m_so_info->output[i].dst_offset - start_comp[i], - ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i], - m_so_info->output[i].output_buffer, - m_so_info->output[i].stream); - emit_export_instruction(out_stream); - m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4; - } - return true; -} - void VertexShaderFromNir::do_finalize() { - if (m_key.vs.as_gs_a) { - PValue o(new GPRValue(0,PIPE_SWIZZLE_0)); - GPRVector primid({PValue(new GPRValue(0,2)), o,o,o}); - m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param); - emit_export_instruction(m_last_param_export); - int i; - i = sh_info().noutput++; - auto& io = sh_info().output[i]; - io.name = TGSI_SEMANTIC_PRIMID; - io.sid = 0; - io.gpr = 0; - io.interpolate = TGSI_INTERPOLATE_CONSTANT; - io.write_mask = 0x4; - io.spi_sid = m_key.vs.prim_id_out; - sh_info().vs_as_gs_a = 1; - } - - finalize_exports(); -} - - -bool VertexShaderFromNirForFS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) -{ - - switch (out_var->data.location) { - case VARYING_SLOT_PSIZ: - sh_info().vs_out_point_size = 1; - sh_info().vs_out_misc_write = 1; - /* fallthrough */ - case VARYING_SLOT_POS: - return emit_varying_pos(out_var, instr); - case VARYING_SLOT_EDGE: { - std::array swizzle_override = {7 ,0, 7, 7}; - return emit_varying_pos(out_var, instr, &swizzle_override); - } - case VARYING_SLOT_CLIP_VERTEX: - return emit_clip_vertices(out_var, instr); - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - m_num_clip_dist += 4; - return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr); - case VARYING_SLOT_LAYER: { - sh_info().vs_out_misc_write = 1; - sh_info().vs_out_layer = 1; - std::array swz = {7,7,0,7}; - return emit_varying_pos(out_var, instr, &swz) && - emit_varying_param(out_var, instr); - } - case VARYING_SLOT_VIEW_INDEX: - return emit_varying_pos(out_var, instr) && - emit_varying_param(out_var, instr); - - default: - if (out_var->data.location <= VARYING_SLOT_VAR31 || - (out_var->data.location >= VARYING_SLOT_TEX0 && - out_var->data.location <= VARYING_SLOT_TEX7)) - return emit_varying_param(out_var, instr); - } - - fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n", - out_var->data.location); - return false; -} - -void VertexShaderFromNirForFS::finalize_exports() -{ - if (m_so_info && m_so_info->num_outputs) - emit_stream(-1); - - m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask; - - if (!m_last_param_export) { - GPRVector value(0,{7,7,7,7}); - m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param); - emit_export_instruction(m_last_param_export); - } - m_last_param_export->set_last(); - - if (!m_last_pos_export) { - GPRVector value(0,{7,7,7,7}); - m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos); - emit_export_instruction(m_last_pos_export); - } - m_last_pos_export->set_last(); - -} - -VertexShaderFromNirForGS::VertexShaderFromNirForGS(r600_pipe_shader *sh, - r600_pipe_shader_selector& sel, - const r600_shader_key &key, - const r600_shader *gs_shader): - VertexShaderFromNir(sh, sel, key), - m_gs_shader(gs_shader) -{ - sh->shader.vs_as_es = true; -} - -bool VertexShaderFromNirForGS::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) -{ - - assert(m_gs_shader); - - int ring_offset = -1; - const r600_shader_io& out_io = sh_info().output[out_var->data.driver_location]; - - sfn_log << SfnLog::io << "check output " << out_var->data.driver_location - << " name=" << out_io.name<< " sid=" << out_io.sid << "\n"; - for (unsigned k = 0; k < m_gs_shader->ninput; ++k) { - auto& in_io = m_gs_shader->input[k]; - sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n"; - - if (in_io.name == out_io.name && - in_io.sid == out_io.sid) { - ring_offset = in_io.ring_offset; - break; - } - } - - if (out_var->data.location == VARYING_SLOT_VIEWPORT) - return true; - - if (ring_offset == -1) { - sfn_log << SfnLog::err << "VS defines output at " - << out_var->data.driver_location << "name=" << out_io.name - << " sid=" << out_io.sid << " that is not consumed as GS input\n"; - return true; - } - - uint32_t write_mask = (1 << instr->num_components) - 1; - - std::unique_ptr value(vec_from_nir_with_fetch_constant(instr->src[1], write_mask, - swizzle_from_mask(instr->num_components))); - - auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value, - ring_offset >> 2, 4, PValue()); - emit_export_instruction(ir); - - sh_info().output[out_var->data.driver_location].write_mask |= write_mask; - if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 || - out_var->data.location == VARYING_SLOT_CLIP_DIST1) - m_num_clip_dist += 4; - - return true; -} - -void VertexShaderFromNirForGS::finalize_exports() -{ -} - - -VertexShaderFromNirForES::VertexShaderFromNirForES(r600_pipe_shader *sh, - UNUSED const pipe_stream_output_info *so_info, - r600_pipe_shader_selector& sel, - const r600_shader_key &key): - VertexShaderFromNir(sh, sel, key) -{ -} - -bool VertexShaderFromNirForES::do_emit_store_deref(UNUSED const nir_variable *out_var, - UNUSED nir_intrinsic_instr* instr) -{ - return false; + m_export_processor->finalize_exports(); } -void VertexShaderFromNirForES::finalize_exports() +bool VertexShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) { + return m_export_processor->store_deref(out_var, instr); } } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h index fe8e6b1cbd5..f1c38452374 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h @@ -28,23 +28,21 @@ #define sfn_vertex_shader_from_nir_h #include "sfn_shader_base.h" +#include "sfn_vertexstageexport.h" namespace r600 { -class VertexShaderFromNir : public ShaderFromNirProcessor { +class VertexShaderFromNir : public VertexStage { public: VertexShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector &sel, - const r600_shader_key &key); + const r600_shader_key &key, r600_shader *gs_shader); bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override; bool scan_sysvalue_access(nir_instr *instr) override; + + PValue primitive_id() override {return m_primitive_id;} protected: - bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr, - std::array *swizzle_override = nullptr); - bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr); - bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr); - bool emit_stream(int stream); // todo: encapsulate unsigned m_num_clip_dist; @@ -54,49 +52,26 @@ protected: unsigned m_enabled_stream_buffers_mask; const pipe_stream_output_info *m_so_info; void do_finalize() override; + + std::map m_param_map; private: + bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; + void finalize_exports(); + void emit_shader_start() override; bool do_process_inputs(nir_variable *input) override; bool allocate_reserved_registers() override; bool do_process_outputs(nir_variable *output) override; bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; - - virtual void finalize_exports() = 0; - - unsigned m_cur_param; - std::map m_param_map; - unsigned m_cur_clip_pos; + bool emit_store_local_shared(nir_intrinsic_instr* instr); PValue m_vertex_id; PValue m_instance_id; + PValue m_rel_vertex_id; + PValue m_primitive_id; r600_shader_key m_key; -}; -class VertexShaderFromNirForFS : public VertexShaderFromNir { -public: - using VertexShaderFromNir::VertexShaderFromNir; - - bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; -private: - void finalize_exports() override; -}; - -class VertexShaderFromNirForGS : public VertexShaderFromNir { -public: - VertexShaderFromNirForGS(r600_pipe_shader *sh, r600_pipe_shader_selector &sel, - const r600_shader_key &key, const r600_shader *gs_shader); - bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; - void finalize_exports() override; - - const r600_shader *m_gs_shader; -}; - -class VertexShaderFromNirForES : public VertexShaderFromNir { -public: - VertexShaderFromNirForES(r600_pipe_shader *sh, const pipe_stream_output_info *so_info, r600_pipe_shader_selector &sel, - const r600_shader_key &key); - bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; - void finalize_exports() override; + std::unique_ptr m_export_processor; }; } diff --git a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp new file mode 100644 index 00000000000..08c778a2e10 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp @@ -0,0 +1,456 @@ +#include "sfn_vertexstageexport.h" + +#include "tgsi/tgsi_from_mesa.h" + +namespace r600 { + +using std::priority_queue; + +VertexStageExportBase::VertexStageExportBase(VertexStage& proc): + m_proc(proc), + m_cur_clip_pos(1), + m_cur_param(0) +{ + +} + +VertexStageExportBase::~VertexStageExportBase() +{ + +} + +VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc, + const pipe_stream_output_info *so_info, + r600_pipe_shader *pipe_shader, const r600_shader_key &key): + VertexStageExportBase(proc), + m_last_param_export(nullptr), + m_last_pos_export(nullptr), + m_num_clip_dist(0), + m_enabled_stream_buffers_mask(0), + m_so_info(so_info), + m_pipe_shader(pipe_shader), + m_key(key) +{ +} + +void VertexStageExportBase::setup_paramn_map() +{ + priority_queue, std::greater> q; + for (auto a: m_param_map) { + q.push(a.first); + } + + int next_param = 0; + while (!q.empty()) { + int loc = q.top(); + q.pop(); + m_param_map[loc] = next_param++; + } +} + +bool VertexStageExportBase::do_process_outputs(nir_variable *output) +{ + if (output->data.location == VARYING_SLOT_COL0 || + output->data.location == VARYING_SLOT_COL1 || + (output->data.location >= VARYING_SLOT_VAR0 && + output->data.location <= VARYING_SLOT_VAR31) || + (output->data.location >= VARYING_SLOT_TEX0 && + output->data.location <= VARYING_SLOT_TEX7) || + output->data.location == VARYING_SLOT_BFC0 || + output->data.location == VARYING_SLOT_BFC1 || + output->data.location == VARYING_SLOT_CLIP_VERTEX || + output->data.location == VARYING_SLOT_CLIP_DIST0 || + output->data.location == VARYING_SLOT_CLIP_DIST1 || + output->data.location == VARYING_SLOT_POS || + output->data.location == VARYING_SLOT_PSIZ || + output->data.location == VARYING_SLOT_FOGC || + output->data.location == VARYING_SLOT_LAYER || + output->data.location == VARYING_SLOT_EDGE || + output->data.location == VARYING_SLOT_VIEWPORT + ) { + + r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location]; + tgsi_get_gl_varying_semantic(static_cast( output->data.location), + true, &io.name, &io.sid); + + m_proc.evaluate_spi_sid(io); + io.write_mask = ((1 << glsl_get_components(output->type)) - 1) + << output->data.location_frac; + ++m_proc.sh_info().noutput; + + if (output->data.location == VARYING_SLOT_PSIZ || + output->data.location == VARYING_SLOT_EDGE || + output->data.location == VARYING_SLOT_LAYER) + m_cur_clip_pos = 2; + + if (output->data.location != VARYING_SLOT_POS && + output->data.location != VARYING_SLOT_EDGE && + output->data.location != VARYING_SLOT_PSIZ && + output->data.location != VARYING_SLOT_CLIP_VERTEX) + m_param_map[output->data.location] = m_cur_param++; + + return true; + } + return false; +} + + +bool VertexStageExportForFS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + + switch (out_var->data.location) { + case VARYING_SLOT_PSIZ: + m_proc.sh_info().vs_out_point_size = 1; + m_proc.sh_info().vs_out_misc_write = 1; + /* fallthrough */ + case VARYING_SLOT_POS: + return emit_varying_pos(out_var, instr); + case VARYING_SLOT_EDGE: { + std::array swizzle_override = {7 ,0, 7, 7}; + return emit_varying_pos(out_var, instr, &swizzle_override); + } + case VARYING_SLOT_CLIP_VERTEX: + return emit_clip_vertices(out_var, instr); + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + m_num_clip_dist += 4; + return emit_varying_param(out_var, instr) && emit_varying_pos(out_var, instr); + case VARYING_SLOT_LAYER: { + m_proc.sh_info().vs_out_misc_write = 1; + m_proc.sh_info().vs_out_layer = 1; + std::array swz = {7,7,0,7}; + return emit_varying_pos(out_var, instr, &swz) && + emit_varying_param(out_var, instr); + } + case VARYING_SLOT_VIEW_INDEX: + return emit_varying_pos(out_var, instr) && + emit_varying_param(out_var, instr); + + default: + if (out_var->data.location <= VARYING_SLOT_VAR31 || + (out_var->data.location >= VARYING_SLOT_TEX0 && + out_var->data.location <= VARYING_SLOT_TEX7)) + return emit_varying_param(out_var, instr); + } + + fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n", + out_var->data.location); + return false; +} + +bool VertexStageExportForFS::emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr, + std::array *swizzle_override) +{ + std::array swizzle; + uint32_t write_mask = 0; + + if (swizzle_override) { + swizzle = *swizzle_override; + for (int i = 0; i < 4; ++i) { + if (swizzle[i] < 6) + write_mask |= 1 << i; + } + } else { + write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac; + for (int i = 0; i < 4; ++i) + swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7; + } + + m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask; + + GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle); + m_proc.set_output(out_var->data.driver_location, PValue(value)); + + int export_slot = 0; + + switch (out_var->data.location) { + case VARYING_SLOT_EDGE: { + m_proc.sh_info().vs_out_misc_write = 1; + m_proc.sh_info().vs_out_edgeflag = 1; + m_proc.emit_instruction(op1_mov, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr}); + m_proc.emit_instruction(op1_flt_to_int, value->reg_i(1), {value->reg_i(1)}, {alu_write, alu_last_instr}); + m_proc.sh_info().output[out_var->data.driver_location].write_mask = 0xf; + } + /* fallthrough */ + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_LAYER: + export_slot = 1; + break; + case VARYING_SLOT_POS: + break; + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + export_slot = m_cur_clip_pos++; + break; + default: + sfn_log << SfnLog::err << __func__ << "Unsupported location " + << out_var->data.location << "\n"; + return false; + } + + m_last_pos_export = new ExportInstruction(export_slot, *value, ExportInstruction::et_pos); + m_proc.emit_export_instruction(m_last_pos_export); + m_proc.add_param_output_reg(out_var->data.driver_location, m_last_pos_export->gpr_ptr()); + return true; +} + +bool VertexStageExportForFS::emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + assert(out_var->data.driver_location < m_proc.sh_info().noutput); + sfn_log << SfnLog::io << __func__ << ": emit DDL: " << out_var->data.driver_location << "\n"; + + int write_mask = nir_intrinsic_write_mask(instr) << out_var->data.location_frac; + std::array swizzle; + for (int i = 0; i < 4; ++i) + swizzle[i] = ((1 << i) & write_mask) ? i - out_var->data.location_frac : 7; + + m_proc.sh_info().output[out_var->data.driver_location].write_mask = write_mask; + + GPRVector *value = m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, swizzle); + m_proc.sh_info().output[out_var->data.driver_location].gpr = value->sel(); + + /* This should use the registers!! */ + m_proc.set_output(out_var->data.driver_location, PValue(value)); + + auto param_loc = m_param_map.find(out_var->data.location); + assert(param_loc != m_param_map.end()); + + m_last_param_export = new ExportInstruction(param_loc->second, *value, ExportInstruction::et_param); + m_proc.emit_export_instruction(m_last_param_export); + m_proc.add_param_output_reg(out_var->data.driver_location, m_last_param_export->gpr_ptr()); + return true; +} + +bool VertexStageExportForFS::emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + m_proc.sh_info().cc_dist_mask = 0xff; + m_proc.sh_info().clip_dist_write = 0xff; + + std::unique_ptr clip_vertex(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], 0xf, {0,1,2,3})); + + for (int i = 0; i < 4; ++i) + m_proc.sh_info().output[out_var->data.driver_location].write_mask |= 1 << i; + + GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()}; + + for (int i = 0; i < 8; i++) { + int oreg = i >> 2; + int ochan = i & 3; + AluInstruction *ir = nullptr; + for (int j = 0; j < 4; j++) { + ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), clip_vertex->reg_i(j), + PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)), + (j == ochan) ? EmitInstruction::write : EmitInstruction::empty); + m_proc.emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + } + + m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos); + m_proc.emit_export_instruction(m_last_pos_export); + + m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos); + m_proc.emit_export_instruction(m_last_pos_export); + + return true; +} + +void VertexStageExportForFS::finalize_exports() +{ + if (m_key.vs.as_gs_a) { + PValue o(new GPRValue(0,PIPE_SWIZZLE_0)); + GPRVector primid({m_proc.primitive_id(), o,o,o}); + m_last_param_export = new ExportInstruction(m_cur_param, primid, ExportInstruction::et_param); + m_proc.emit_export_instruction(m_last_param_export); + int i; + i = m_proc.sh_info().noutput++; + auto& io = m_proc.sh_info().output[i]; + io.name = TGSI_SEMANTIC_PRIMID; + io.sid = 0; + io.gpr = 0; + io.interpolate = TGSI_INTERPOLATE_CONSTANT; + io.write_mask = 0x1; + io.spi_sid = m_key.vs.prim_id_out; + m_proc.sh_info().vs_as_gs_a = 1; + } + + if (m_so_info && m_so_info->num_outputs) + emit_stream(-1); + + m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask; + + if (!m_last_param_export) { + GPRVector value(0,{7,7,7,7}); + m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param); + m_proc.emit_export_instruction(m_last_param_export); + } + m_last_param_export->set_last(); + + if (!m_last_pos_export) { + GPRVector value(0,{7,7,7,7}); + m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos); + m_proc.emit_export_instruction(m_last_pos_export); + } + m_last_pos_export->set_last(); +} + +bool VertexStageExportForFS::emit_stream(int stream) +{ + assert(m_so_info); + if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) { + R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs); + return false; + } + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + if (m_so_info->output[i].output_buffer >= 4) { + R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", + m_so_info->output[i].output_buffer); + return false; + } + } + const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS]; + unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; + std::vector tmp(m_so_info->num_outputs); + + /* Initialize locations where the outputs are stored. */ + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + if (stream != -1 && stream != m_so_info->output[i].stream) + continue; + + sfn_log << SfnLog::instr << "Emit stream " << i + << " with register index " << m_so_info->output[i].register_index << " so_gpr:"; + + + so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index); + + if (!so_gpr[i]) { + sfn_log << SfnLog::err << "\nERR: register index " + << m_so_info->output[i].register_index + << " doesn't correspond to an output register\n"; + return false; + } + start_comp[i] = m_so_info->output[i].start_component; + /* Lower outputs with dst_offset < start_component. + * + * We can only output 4D vectors with a write mask, e.g. we can + * only output the W component at offset 3, etc. If we want + * to store Y, Z, or W at buffer offset 0, we need to use MOV + * to move it to X and output X. */ + if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) { + int tmp_index = m_proc.allocate_temp_register(); + int sc = m_so_info->output[i].start_component; + AluInstruction *alu = nullptr; + for (int j = 0; j < m_so_info->output[i].num_components; j++) { + PValue dst(new GPRValue(tmp_index, j)); + alu = new AluInstruction(op1_mov, dst, so_gpr[i]->reg_i(j + sc), {alu_write}); + tmp[i].set_reg_i(j, dst); + m_proc.emit_instruction(alu); + } + if (alu) + alu->set_flag(alu_last_instr); + + /* Fill the vector with masked values */ + PValue dst_blank(new GPRValue(tmp_index, 7)); + for (int j = m_so_info->output[i].num_components; j < 4; j++) + tmp[i].set_reg_i(j, dst_blank); + + start_comp[i] = 0; + so_gpr[i] = &tmp[i]; + } + sfn_log << SfnLog::instr << *so_gpr[i] << "\n"; + } + + /* Write outputs to buffers. */ + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + sfn_log << SfnLog::instr << "Write output buffer " << i + << " with register index " << m_so_info->output[i].register_index << "\n"; + + StreamOutIntruction *out_stream = + new StreamOutIntruction(*so_gpr[i], + m_so_info->output[i].num_components, + m_so_info->output[i].dst_offset - start_comp[i], + ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i], + m_so_info->output[i].output_buffer, + m_so_info->output[i].stream); + m_proc.emit_export_instruction(out_stream); + m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4; + } + return true; +} + + +VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc, + const r600_shader *gs_shader): + VertexStageExportBase(proc), + m_gs_shader(gs_shader) +{ + +} + +bool VertexStageExportForGS::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + + int ring_offset = -1; + const r600_shader_io& out_io = m_proc.sh_info().output[out_var->data.driver_location]; + + sfn_log << SfnLog::io << "check output " << out_var->data.driver_location + << " name=" << out_io.name<< " sid=" << out_io.sid << "\n"; + for (unsigned k = 0; k < m_gs_shader->ninput; ++k) { + auto& in_io = m_gs_shader->input[k]; + sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n"; + + if (in_io.name == out_io.name && + in_io.sid == out_io.sid) { + ring_offset = in_io.ring_offset; + break; + } + } + + if (out_var->data.location == VARYING_SLOT_VIEWPORT) + return true; + + if (ring_offset == -1) { + sfn_log << SfnLog::err << "VS defines output at " + << out_var->data.driver_location << "name=" << out_io.name + << " sid=" << out_io.sid << " that is not consumed as GS input\n"; + return true; + } + + uint32_t write_mask = (1 << instr->num_components) - 1; + + std::unique_ptr value(m_proc.vec_from_nir_with_fetch_constant(instr->src[1], write_mask, + swizzle_from_mask(instr->num_components))); + + auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, *value, + ring_offset >> 2, 4, PValue()); + m_proc.emit_export_instruction(ir); + + m_proc.sh_info().output[out_var->data.driver_location].write_mask |= write_mask; + if (out_var->data.location == VARYING_SLOT_CLIP_DIST0 || + out_var->data.location == VARYING_SLOT_CLIP_DIST1) + m_num_clip_dist += 4; + + return true; +} + +void VertexStageExportForGS::finalize_exports() +{ + +} + +VertexStageExportForES::VertexStageExportForES(VertexStage& proc): + VertexStageExportBase(proc) +{ +} + +bool VertexStageExportForES::store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) +{ + return true; +} + +void VertexStageExportForES::finalize_exports() +{ + +} + +} diff --git a/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h new file mode 100644 index 00000000000..7e2ca620be6 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h @@ -0,0 +1,85 @@ +#ifndef VERTEXSTAGEEXPORT_H +#define VERTEXSTAGEEXPORT_H + +#include "sfn_shader_base.h" + +namespace r600 { + +class VertexStage : public ShaderFromNirProcessor { +public: + using ShaderFromNirProcessor::ShaderFromNirProcessor; + + virtual PValue primitive_id() = 0; +}; + +class VertexStageExportBase +{ +public: + VertexStageExportBase(VertexStage& proc); + virtual ~VertexStageExportBase(); + void setup_paramn_map(); + virtual bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) = 0; + virtual void finalize_exports() = 0; + virtual bool do_process_outputs(nir_variable *output); + int cur_param() const {return m_cur_param;} +protected: + VertexStage& m_proc; + std::map m_param_map; + int m_cur_clip_pos; + int m_cur_param; +}; + +class VertexStageExportForFS : public VertexStageExportBase +{ +public: + VertexStageExportForFS(VertexStage& proc, + const pipe_stream_output_info *so_info, + r600_pipe_shader *pipe_shader, + const r600_shader_key& key); + + bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; + void finalize_exports() override; + +private: + bool emit_varying_param(const nir_variable *out_var, nir_intrinsic_instr* instr); + bool emit_varying_pos(const nir_variable *out_var, nir_intrinsic_instr* instr, + std::array *swizzle_override = nullptr); + bool emit_clip_vertices(const nir_variable *out_var, nir_intrinsic_instr* instr); + bool emit_stream(int stream); + + ExportInstruction *m_last_param_export; + ExportInstruction *m_last_pos_export; + + int m_num_clip_dist; + int m_enabled_stream_buffers_mask; + const pipe_stream_output_info *m_so_info; + r600_pipe_shader *m_pipe_shader; + const r600_shader_key& m_key; + +}; + +class VertexStageExportForGS : public VertexStageExportBase +{ +public: + VertexStageExportForGS(VertexStage& proc, + const r600_shader *gs_shader); + bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; + void finalize_exports() override; + +private: + unsigned m_num_clip_dist; + const r600_shader *m_gs_shader; +}; + +class VertexStageExportForES : public VertexStageExportBase +{ +public: + VertexStageExportForES(VertexStage& proc); + bool store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; + void finalize_exports() override; +}; + + +} + +#endif // VERTEXSTAGEEXPORT_H