From c79972b6047b1d49ec0c0652a2d7d716b1c2b457 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 30 Oct 2019 18:24:39 +0100 Subject: [PATCH] aco: always set scratch_offset in startpgm This patch also moves private_segment_buffer and scratch_offset to Program to easily access it. Reviewed-by: Rhys Perry --- .../compiler/aco_instruction_selection.cpp | 22 +++++++++---------- .../aco_instruction_selection_setup.cpp | 16 ++++++-------- src/amd/compiler/aco_ir.h | 7 +++--- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index e935dc11a9b..33242b7f6b7 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4922,9 +4922,9 @@ void visit_shared_atomic(isel_context *ctx, nir_intrinsic_instr *instr) Temp get_scratch_resource(isel_context *ctx) { Builder bld(ctx->program, ctx->block); - Temp scratch_addr = ctx->private_segment_buffer; + Temp scratch_addr = ctx->program->private_segment_buffer; if (ctx->stage != compute_cs) - scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), ctx->private_segment_buffer, Operand(0u)); + scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand(0u)); uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx->options->wave_size == 64 ? 3 : 2);; @@ -4971,11 +4971,11 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { std::array elems; Temp lower = bld.mubuf(aco_opcode::buffer_load_dwordx4, bld.def(v4), offset, rsrc, - ctx->scratch_offset, 0, true); + ctx->program->scratch_offset, 0, true); Temp upper = bld.mubuf(dst.size() == 6 ? aco_opcode::buffer_load_dwordx2 : aco_opcode::buffer_load_dwordx4, dst.size() == 6 ? bld.def(v2) : bld.def(v4), - offset, rsrc, ctx->scratch_offset, 16, true); + offset, rsrc, ctx->program->scratch_offset, 16, true); emit_split_vector(ctx, lower, 2); elems[0] = emit_extract_vector(ctx, lower, 0, v2); elems[1] = emit_extract_vector(ctx, lower, 1, v2); @@ -5000,7 +5000,7 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { unreachable("Wrong dst size for nir_intrinsic_load_scratch"); } - bld.mubuf(op, Definition(dst), offset, rsrc, ctx->scratch_offset, 0, true); + bld.mubuf(op, Definition(dst), offset, rsrc, ctx->program->scratch_offset, 0, true); emit_split_vector(ctx, dst, instr->num_components); } @@ -5061,7 +5061,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { unreachable("Invalid data size for nir_intrinsic_store_scratch."); } - bld.mubuf(op, offset, rsrc, ctx->scratch_offset, write_data, start * elem_size_bytes, true); + bld.mubuf(op, offset, rsrc, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true); } } @@ -5292,6 +5292,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) Temp sample_pos; Temp addr = get_ssa_temp(ctx, instr->src[0].ssa); nir_const_value* const_addr = nir_src_as_const_value(instr->src[0]); + Temp private_segment_buffer = ctx->program->private_segment_buffer; if (addr.type() == RegType::sgpr) { Operand offset; if (const_addr) { @@ -5303,17 +5304,16 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) offset = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), addr, Operand(3u)); offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), addr, Operand(sample_pos_offset)); } - addr = ctx->private_segment_buffer; - sample_pos = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), addr, Operand(offset)); + sample_pos = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand(offset)); } else if (ctx->options->chip_class >= GFX9) { addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), addr); - sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr, ctx->private_segment_buffer, sample_pos_offset); + sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr, private_segment_buffer, sample_pos_offset); } else { - /* addr += ctx->private_segment_buffer + sample_pos_offset */ + /* addr += private_segment_buffer + sample_pos_offset */ Temp tmp0 = bld.tmp(s1); Temp tmp1 = bld.tmp(s1); - bld.pseudo(aco_opcode::p_split_vector, Definition(tmp0), Definition(tmp1), ctx->private_segment_buffer); + bld.pseudo(aco_opcode::p_split_vector, Definition(tmp0), Definition(tmp1), private_segment_buffer); Definition scc_tmp = bld.def(s1, scc); tmp0 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), scc_tmp, tmp0, Operand(sample_pos_offset)); tmp1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), tmp1, Operand(0u), bld.scc(scc_tmp.getTemp())); diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index b65628c8521..3ab8ebb19eb 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -97,8 +97,6 @@ struct isel_context { /* scratch */ bool scratch_enabled = false; - Temp private_segment_buffer = Temp(0, s2); /* also the part of the scratch descriptor on compute */ - Temp scratch_offset = Temp(0, s1); /* inputs common for merged stages */ Temp merged_wave_info = Temp(0, s1); @@ -929,7 +927,7 @@ void add_startpgm(struct isel_context *ctx) /* this needs to be in sgprs 0 and 1 */ if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) { - add_arg(&args, s2, &ctx->private_segment_buffer, 0); + add_arg(&args, s2, &ctx->program->private_segment_buffer, 0); set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx); } @@ -961,8 +959,8 @@ void add_startpgm(struct isel_context *ctx) else declare_streamout_sgprs(ctx, &args, &idx); - if (ctx->scratch_enabled) - add_arg(&args, s1, &ctx->scratch_offset, idx++); + if (ctx->options->supports_spill || ctx->scratch_enabled) + add_arg(&args, s1, &ctx->program->scratch_offset, idx++); declare_vs_input_vgprs(ctx, &args); break; @@ -973,8 +971,8 @@ void add_startpgm(struct isel_context *ctx) assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr); add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx); - if (ctx->scratch_enabled) - add_arg(&args, s1, &ctx->scratch_offset, user_sgpr_info.user_sgpr_idx + 1); + if (ctx->options->supports_spill || ctx->scratch_enabled) + add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1); ctx->program->config->spi_ps_input_addr = 0; ctx->program->config->spi_ps_input_ena = 0; @@ -1039,8 +1037,8 @@ void add_startpgm(struct isel_context *ctx) if (ctx->program->info->cs.uses_local_invocation_idx) add_arg(&args, s1, &ctx->tg_size, idx++); - if (ctx->scratch_enabled) - add_arg(&args, s1, &ctx->scratch_offset, idx++); + if (ctx->options->supports_spill || ctx->scratch_enabled) + add_arg(&args, s1, &ctx->program->scratch_offset, idx++); add_arg(&args, v1, &ctx->local_invocation_ids[0], vgpr_idx++); add_arg(&args, v1, &ctx->local_invocation_ids[1], vgpr_idx++); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 29aefef26cf..5a16afdca36 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1076,15 +1076,16 @@ public: bool wb_smem_l1_on_end = false; std::vector constant_data; + Temp private_segment_buffer; + Temp scratch_offset; uint16_t lds_alloc_granule; uint32_t lds_limit; /* in bytes */ - uint16_t vgpr_limit; - + uint16_t sgpr_limit; uint16_t physical_sgprs; uint16_t sgpr_alloc_granule; /* minus one. must be power of two */ - uint16_t sgpr_limit; + bool needs_vcc = false; bool needs_xnack_mask = false; bool needs_flat_scr = false; -- 2.30.2