aco: always set scratch_offset in startpgm
authorDaniel Schürmann <daniel@schuermann.dev>
Wed, 30 Oct 2019 17:24:39 +0000 (18:24 +0100)
committerDaniel Schürmann <daniel@schuermann.dev>
Wed, 30 Oct 2019 19:48:33 +0000 (19:48 +0000)
This patch also moves private_segment_buffer and
scratch_offset to Program to easily access it.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/compiler/aco_ir.h

index e935dc11a9be40a8bbe2e28997e0600fd33da56e..33242b7f6b7e3a42fe4e18553f62d6c883a06f8b 100644 (file)
@@ -4922,9 +4922,9 @@ void visit_shared_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
 Temp get_scratch_resource(isel_context *ctx)
 {
    Builder bld(ctx->program, ctx->block);
-   Temp scratch_addr = ctx->private_segment_buffer;
+   Temp scratch_addr = ctx->program->private_segment_buffer;
    if (ctx->stage != compute_cs)
-      scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), ctx->private_segment_buffer, Operand(0u));
+      scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand(0u));
 
    uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) |
                         S_008F0C_INDEX_STRIDE(ctx->options->wave_size == 64 ? 3 : 2);;
@@ -4971,11 +4971,11 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
          std::array<Temp,NIR_MAX_VEC_COMPONENTS> elems;
          Temp lower = bld.mubuf(aco_opcode::buffer_load_dwordx4,
                                 bld.def(v4), offset, rsrc,
-                                ctx->scratch_offset, 0, true);
+                                ctx->program->scratch_offset, 0, true);
          Temp upper = bld.mubuf(dst.size() == 6 ? aco_opcode::buffer_load_dwordx2 :
                                                   aco_opcode::buffer_load_dwordx4,
                                 dst.size() == 6 ? bld.def(v2) : bld.def(v4),
-                                offset, rsrc, ctx->scratch_offset, 16, true);
+                                offset, rsrc, ctx->program->scratch_offset, 16, true);
          emit_split_vector(ctx, lower, 2);
          elems[0] = emit_extract_vector(ctx, lower, 0, v2);
          elems[1] = emit_extract_vector(ctx, lower, 1, v2);
@@ -5000,7 +5000,7 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
          unreachable("Wrong dst size for nir_intrinsic_load_scratch");
    }
 
-   bld.mubuf(op, Definition(dst), offset, rsrc, ctx->scratch_offset, 0, true);
+   bld.mubuf(op, Definition(dst), offset, rsrc, ctx->program->scratch_offset, 0, true);
    emit_split_vector(ctx, dst, instr->num_components);
 }
 
@@ -5061,7 +5061,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
             unreachable("Invalid data size for nir_intrinsic_store_scratch.");
       }
 
-      bld.mubuf(op, offset, rsrc, ctx->scratch_offset, write_data, start * elem_size_bytes, true);
+      bld.mubuf(op, offset, rsrc, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true);
    }
 }
 
@@ -5292,6 +5292,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
       Temp sample_pos;
       Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
       nir_const_value* const_addr = nir_src_as_const_value(instr->src[0]);
+      Temp private_segment_buffer = ctx->program->private_segment_buffer;
       if (addr.type() == RegType::sgpr) {
          Operand offset;
          if (const_addr) {
@@ -5303,17 +5304,16 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
             offset = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), addr, Operand(3u));
             offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), addr, Operand(sample_pos_offset));
          }
-         addr = ctx->private_segment_buffer;
-         sample_pos = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), addr, Operand(offset));
+         sample_pos = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand(offset));
 
       } else if (ctx->options->chip_class >= GFX9) {
          addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), addr);
-         sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr, ctx->private_segment_buffer, sample_pos_offset);
+         sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr, private_segment_buffer, sample_pos_offset);
       } else {
-         /* addr += ctx->private_segment_buffer + sample_pos_offset */
+         /* addr += private_segment_buffer + sample_pos_offset */
          Temp tmp0 = bld.tmp(s1);
          Temp tmp1 = bld.tmp(s1);
-         bld.pseudo(aco_opcode::p_split_vector, Definition(tmp0), Definition(tmp1), ctx->private_segment_buffer);
+         bld.pseudo(aco_opcode::p_split_vector, Definition(tmp0), Definition(tmp1), private_segment_buffer);
          Definition scc_tmp = bld.def(s1, scc);
          tmp0 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), scc_tmp, tmp0, Operand(sample_pos_offset));
          tmp1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), tmp1, Operand(0u), bld.scc(scc_tmp.getTemp()));
index b65628c8521003eda90aae8fb62799796c0df369..3ab8ebb19eb1be5ce554bfffd0b4121dbd622b81 100644 (file)
@@ -97,8 +97,6 @@ struct isel_context {
 
    /* scratch */
    bool scratch_enabled = false;
-   Temp private_segment_buffer = Temp(0, s2); /* also the part of the scratch descriptor on compute */
-   Temp scratch_offset = Temp(0, s1);
 
    /* inputs common for merged stages */
    Temp merged_wave_info = Temp(0, s1);
@@ -929,7 +927,7 @@ void add_startpgm(struct isel_context *ctx)
 
    /* this needs to be in sgprs 0 and 1 */
    if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) {
-      add_arg(&args, s2, &ctx->private_segment_buffer, 0);
+      add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
       set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
    }
 
@@ -961,8 +959,8 @@ void add_startpgm(struct isel_context *ctx)
       else
          declare_streamout_sgprs(ctx, &args, &idx);
 
-      if (ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->scratch_offset, idx++);
+      if (ctx->options->supports_spill || ctx->scratch_enabled)
+         add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
 
       declare_vs_input_vgprs(ctx, &args);
       break;
@@ -973,8 +971,8 @@ void add_startpgm(struct isel_context *ctx)
       assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr);
       add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx);
 
-      if (ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
+      if (ctx->options->supports_spill || ctx->scratch_enabled)
+         add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
 
       ctx->program->config->spi_ps_input_addr = 0;
       ctx->program->config->spi_ps_input_ena = 0;
@@ -1039,8 +1037,8 @@ void add_startpgm(struct isel_context *ctx)
 
       if (ctx->program->info->cs.uses_local_invocation_idx)
          add_arg(&args, s1, &ctx->tg_size, idx++);
-      if (ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->scratch_offset, idx++);
+      if (ctx->options->supports_spill || ctx->scratch_enabled)
+         add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
 
       add_arg(&args, v1, &ctx->local_invocation_ids[0], vgpr_idx++);
       add_arg(&args, v1, &ctx->local_invocation_ids[1], vgpr_idx++);
index 29aefef26cf3986251f81057780617d966df32f0..5a16afdca364928adad077d4dfc3868a92931ef0 100644 (file)
@@ -1076,15 +1076,16 @@ public:
    bool wb_smem_l1_on_end = false;
 
    std::vector<uint8_t> constant_data;
+   Temp private_segment_buffer;
+   Temp scratch_offset;
 
    uint16_t lds_alloc_granule;
    uint32_t lds_limit; /* in bytes */
-
    uint16_t vgpr_limit;
-
+   uint16_t sgpr_limit;
    uint16_t physical_sgprs;
    uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
-   uint16_t sgpr_limit;
+
    bool needs_vcc = false;
    bool needs_xnack_mask = false;
    bool needs_flat_scr = false;