From: Timur Kristóf Date: Fri, 28 Feb 2020 15:20:01 +0000 (+0100) Subject: aco: Refactor VS output stores in preparation for tessellation. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=db93af5f1b7a04fba0899e45b64204766c6ec4aa;p=mesa.git aco: Refactor VS output stores in preparation for tessellation. This commit takes the new helpers into use by the VS output store function. This function is also where the VS outputs will be handled when the VS runs on the HW LS stage. Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index d535b864138..1d54e3e4051 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3150,81 +3150,39 @@ std::pair get_intrinsic_io_basic_offset(isel_context *ctx, nir_i return get_intrinsic_io_basic_offset(ctx, instr, stride, stride); } -void visit_store_vsgs_output(isel_context *ctx, nir_intrinsic_instr *instr) +void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr) { - unsigned write_mask = nir_intrinsic_write_mask(instr); - unsigned component = nir_intrinsic_component(instr); - Temp src = get_ssa_temp(ctx, instr->src[0].ssa); - unsigned idx = (nir_intrinsic_base(instr) + component) * 4u; - Operand offset(s1); Builder bld(ctx->program, ctx->block); - nir_instr *off_instr = instr->src[1].ssa->parent_instr; - if (off_instr->type != nir_instr_type_load_const) - offset = bld.v_mul24_imm(bld.def(v1), get_ssa_temp(ctx, instr->src[1].ssa), 16u); - else - idx += nir_instr_as_load_const(off_instr)->value[0].u32 * 16u; - + std::pair offs = get_intrinsic_io_basic_offset(ctx, instr, 4u); + Temp src = get_ssa_temp(ctx, instr->src[0].ssa); + unsigned write_mask = nir_intrinsic_write_mask(instr); unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8u; + if (ctx->stage == vertex_es) { + /* GFX6-8: ES stage is not merged into GS, data is passed from ES to GS in VMEM. */ Temp esgs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_ESGS_VS * 16u)); + Temp es2gs_offset = get_arg(ctx, ctx->args->es2gs_offset); + store_vmem_mubuf(ctx, src, esgs_ring, offs.first, es2gs_offset, offs.second, elem_size_bytes, write_mask, false, true, true); + } else { + Temp lds_base; - Temp elems[NIR_MAX_VEC_COMPONENTS * 2]; - if (elem_size_bytes == 8) { - for (unsigned i = 0; i < src.size() / 2; i++) { - Temp elem = emit_extract_vector(ctx, src, i, v2); - elems[i*2] = bld.tmp(v1); - elems[i*2+1] = bld.tmp(v1); - bld.pseudo(aco_opcode::p_split_vector, Definition(elems[i*2]), Definition(elems[i*2+1]), elem); - } - write_mask = widen_mask(write_mask, 2); - elem_size_bytes /= 2u; + if (ctx->stage == vertex_geometry_gs) { + /* GFX9+: ES stage is merged into GS, data is passed between them using LDS. */ + unsigned itemsize = ctx->program->info->vs.es_info.esgs_itemsize; + Temp thread_id = emit_mbcnt(ctx, bld.def(v1)); + Temp wave_idx = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info), Operand(4u << 16 | 24)); + Temp vertex_idx = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), thread_id, + bld.v_mul24_imm(bld.def(v1), as_vgpr(ctx, wave_idx), ctx->program->wave_size)); + lds_base = bld.v_mul24_imm(bld.def(v1), vertex_idx, itemsize); } else { - for (unsigned i = 0; i < src.size(); i++) - elems[i] = emit_extract_vector(ctx, src, i, v1); + unreachable("Invalid LS or ES stage"); } - while (write_mask) { - unsigned index = u_bit_scan(&write_mask); - unsigned offset = index * elem_size_bytes; - Temp elem = emit_extract_vector(ctx, src, index, RegClass(RegType::vgpr, elem_size_bytes / 4)); - - Operand vaddr_offset(v1); - unsigned const_offset = idx + offset; - if (const_offset >= 4096u) { - vaddr_offset = bld.copy(bld.def(v1), Operand(const_offset / 4096u * 4096u)); - const_offset %= 4096u; - } - - aco_ptr mtbuf{create_instruction(aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)}; - mtbuf->operands[0] = Operand(esgs_ring); - mtbuf->operands[1] = vaddr_offset; - mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->es2gs_offset)); - mtbuf->operands[3] = Operand(elem); - mtbuf->offen = !vaddr_offset.isUndefined(); - mtbuf->dfmt = V_008F0C_BUF_DATA_FORMAT_32; - mtbuf->nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - mtbuf->offset = const_offset; - mtbuf->glc = true; - mtbuf->slc = true; - mtbuf->barrier = barrier_none; - mtbuf->can_reorder = true; - bld.insert(std::move(mtbuf)); - } - } else { - unsigned itemsize = ctx->program->info->vs.es_info.esgs_itemsize; - - Temp vertex_idx = emit_mbcnt(ctx, bld.def(v1)); - Temp wave_idx = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info), Operand(4u << 16 | 24)); - vertex_idx = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), vertex_idx, - bld.v_mul24_imm(bld.def(v1), as_vgpr(ctx, wave_idx), ctx->program->wave_size)); - - Temp lds_base = bld.v_mul24_imm(bld.def(v1), vertex_idx, itemsize); - if (!offset.isUndefined()) - lds_base = bld.vadd32(bld.def(v1), offset, lds_base); + offs = offset_add(ctx, offs, std::make_pair(lds_base, 0u)); + unsigned lds_align = calculate_lds_alignment(ctx, offs.second); + store_lds(ctx, elem_size_bytes, src, write_mask, offs.first, offs.second, lds_align); - unsigned align = calculate_lds_alignment(ctx, idx); - store_lds(ctx, elem_size_bytes, src, write_mask, lds_base, idx, align); } } @@ -3258,7 +3216,7 @@ void visit_store_output(isel_context *ctx, nir_intrinsic_instr *instr) } } else if (ctx->stage == vertex_es || (ctx->stage == vertex_geometry_gs && ctx->shader->info.stage == MESA_SHADER_VERTEX)) { - visit_store_vsgs_output(ctx, instr); + visit_store_ls_or_es_output(ctx, instr); } else { unreachable("Shader stage not implemented"); }