radv: Replace supports_spill with explict_scratch_args
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 12 Nov 2019 14:38:46 +0000 (15:38 +0100)
committerConnor Abbott <cwabbott0@gmail.com>
Mon, 25 Nov 2019 13:17:51 +0000 (14:17 +0100)
The former was always true and hence dead code. We will want to
explicitly declare the ring offset register with ACO, but we also want
to declare the scratch offset too, and we can't try to disable it since
ACO also supports spilling and the determination of whether spilling has
to happen occurs well after setting up registers. So replace
supports_spill with something that will actually be used for ACO.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h
src/amd/vulkan/radv_shader_args.c
src/amd/vulkan/radv_shader_args.h

index c3f2832fa8ffac6f069221f8397c6c8159f4f623..fbab89417cd3e7b8833ecffda48c0eeeb167159e 100644 (file)
@@ -95,9 +95,6 @@ struct isel_context {
       bool exec_potentially_empty = false;
    } cf_info;
 
-   /* scratch */
-   bool scratch_enabled = false;
-
    /* inputs common for merged stages */
    Temp merged_wave_info = Temp(0, s1);
 
@@ -639,8 +636,7 @@ static void allocate_user_sgprs(isel_context *ctx,
       user_sgpr_info.need_ring_offsets = true;
 
    /* 2 user sgprs will nearly always be allocated for scratch/rings */
-   if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled)
-      user_sgpr_count += 2;
+   user_sgpr_count += 2;
 
    switch (ctx->stage) {
    case vertex_vs:
@@ -895,10 +891,8 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
    arg_info args = {};
 
    /* this needs to be in sgprs 0 and 1 */
-   if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) {
-      add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
-      set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
-   }
+   add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
+   set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
 
    unsigned vgpr_idx = 0;
    switch (ctx->stage) {
@@ -928,8 +922,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
       else
          declare_streamout_sgprs(ctx, &args, &idx);
 
-      if (ctx->options->supports_spill || ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
+      add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
 
       declare_vs_input_vgprs(ctx, &args);
       break;
@@ -940,8 +933,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
       assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr);
       add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx);
 
-      if (ctx->options->supports_spill || ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
+      add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
 
       ctx->program->config->spi_ps_input_addr = 0;
       ctx->program->config->spi_ps_input_ena = 0;
@@ -1004,8 +996,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
 
       if (ctx->program->info->cs.uses_local_invocation_idx)
          add_arg(&args, s1, &ctx->tg_size, idx++);
-      if (ctx->options->supports_spill || ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
+      add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
 
       add_arg(&args, v3, &ctx->local_invocation_ids, vgpr_idx++);
       break;
@@ -1357,7 +1348,6 @@ setup_isel_context(Program* program,
    unsigned scratch_size = 0;
    for (unsigned i = 0; i < shader_count; i++)
       scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
-   ctx.scratch_enabled = scratch_size > 0;
    ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
 
    ctx.block = ctx.program->create_and_insert_block();
index 11f983974d67507cd3d180c312a1a09c54a60f6c..5d87b9a675a05c18d788ed276a340b3cc53d5755 100644 (file)
@@ -411,15 +411,11 @@ static void create_function(struct radv_shader_context *ctx,
            ctx->max_workgroup_size,
            ctx->args->options);
 
-       if (ctx->args->options->supports_spill) {
-               ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
-                                                      LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
-                                                      NULL, 0, AC_FUNC_ATTR_READNONE);
-               ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
-                                                    ac_array_in_const_addr_space(ctx->ac.v4i32), "");
-       } else if (ctx->args->ring_offsets.used) {
-               ctx->ring_offsets = ac_get_arg(&ctx->ac, ctx->args->ring_offsets);
-       }
+       ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
+                                              LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
+                                              NULL, 0, AC_FUNC_ATTR_READNONE);
+       ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
+                                            ac_array_in_const_addr_space(ctx->ac.v4i32), "");
 
        load_descriptor_sets(ctx);
 
index 81526c7eca0d2542275ede62ba2879b726a14e9f..a7253976f6737cebff246e0d13ee3625c34235e6 100644 (file)
@@ -1118,8 +1118,7 @@ shader_variant_compile(struct radv_device *device,
                struct ac_llvm_compiler ac_llvm;
                bool thread_compiler;
 
-               if (options->supports_spill)
-                       tm_options |= AC_TM_SUPPORTS_SPILL;
+               tm_options |= AC_TM_SUPPORTS_SPILL;
                if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
                        tm_options |= AC_TM_SISCHED;
                if (options->check_ir)
@@ -1200,7 +1199,7 @@ radv_shader_variant_compile(struct radv_device *device,
        if (key)
                options.key = *key;
 
-       options.supports_spill = true;
+       options.explicit_scratch_args = use_aco;
        options.robust_buffer_access = device->robust_buffer_access;
 
        return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,
index 0dde52e1303a8df08314333a5e67cad0d93a95ab..7ffce47bdd8e1eeb28b001d1a220c68b741ea089 100644 (file)
@@ -125,7 +125,7 @@ struct radv_shader_variant_key {
 struct radv_nir_compiler_options {
        struct radv_pipeline_layout *layout;
        struct radv_shader_variant_key key;
-       bool supports_spill;
+       bool explicit_scratch_args;
        bool clamp_shadow_reference;
        bool robust_buffer_access;
        bool dump_shader;
index 949b91dcf94396ca65c45d27248dda203a6c231f..bcec3e9d2e7de3cae564384b240e81f0955fec0f 100644 (file)
@@ -72,7 +72,6 @@ set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
 }
 
 struct user_sgpr_info {
-       bool need_ring_offsets;
        bool indirect_all_descriptor_sets;
        uint8_t remaining_sgprs;
 };
@@ -168,22 +167,8 @@ static void allocate_user_sgprs(struct radv_shader_args *args,
 
        memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
 
-       /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
-       if (stage == MESA_SHADER_GEOMETRY ||
-           stage == MESA_SHADER_VERTEX ||
-           stage == MESA_SHADER_TESS_CTRL ||
-           stage == MESA_SHADER_TESS_EVAL ||
-           args->is_gs_copy_shader)
-               user_sgpr_info->need_ring_offsets = true;
-
-       if (stage == MESA_SHADER_FRAGMENT &&
-           args->shader_info->ps.needs_sample_positions)
-               user_sgpr_info->need_ring_offsets = true;
-
-       /* 2 user sgprs will nearly always be allocated for scratch/rings */
-       if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) {
-               user_sgpr_count += 2;
-       }
+       /* 2 user sgprs will always be allocated for scratch/rings */
+       user_sgpr_count += 2;
 
        switch (stage) {
        case MESA_SHADER_COMPUTE:
@@ -464,7 +449,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
        allocate_user_sgprs(args, stage, has_previous_stage,
                            previous_stage, needs_view_index, &user_sgpr_info);
 
-       if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) {
+       if (args->options->explicit_scratch_args) {
                ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
                           &args->ring_offsets);
        }
@@ -490,6 +475,11 @@ radv_declare_shader_args(struct radv_shader_args *args,
                                   &args->ac.tg_size);
                }
 
+               if (args->options->explicit_scratch_args) {
+                       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+                                  &args->scratch_offset);
+               }
+
                ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
                           &args->ac.local_invocation_ids);
                break;
@@ -513,6 +503,11 @@ radv_declare_shader_args(struct radv_shader_args *args,
                        declare_streamout_sgprs(args, stage);
                }
 
+               if (args->options->explicit_scratch_args) {
+                       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+                                  &args->scratch_offset);
+               }
+
                declare_vs_input_vgprs(args);
                break;
        case MESA_SHADER_TESS_CTRL:
@@ -524,7 +519,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
                                   &args->tess_factor_offset);
 
-                       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
+                       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
 
@@ -556,6 +551,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
                                   &args->tess_factor_offset);
+                       if (args->options->explicit_scratch_args) {
+                               ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+                                          &args->scratch_offset);
+                       }
                        ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
                                   &args->ac.tcs_patch_id);
                        ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
@@ -578,6 +577,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
                        declare_streamout_sgprs(args, stage);
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
                }
+               if (args->options->explicit_scratch_args) {
+                       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+                                  &args->scratch_offset);
+               }
                declare_tes_input_vgprs(args);
                break;
        case MESA_SHADER_GEOMETRY:
@@ -595,7 +598,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
                                   &args->merged_wave_info);
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
 
-                       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
+                       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
 
@@ -638,6 +641,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
 
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
                        ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
+                       if (args->options->explicit_scratch_args) {
+                               ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+                                          &args->scratch_offset);
+                       }
                        ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
                                   &args->gs_vtx_offset[0]);
                        ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
@@ -660,6 +667,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
                declare_global_input_sgprs(args, &user_sgpr_info);
 
                ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
+               if (args->options->explicit_scratch_args) {
+                       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+                                  &args->scratch_offset);
+               }
                ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
                ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
                ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
@@ -682,7 +693,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
        }
 
        args->shader_info->num_input_vgprs = 0;
-       args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0;
+       args->shader_info->num_input_sgprs = 2;
        args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
 
        if (stage != MESA_SHADER_FRAGMENT)
@@ -690,10 +701,8 @@ radv_declare_shader_args(struct radv_shader_args *args,
 
        uint8_t user_sgpr_idx = 0;
 
-       if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) {
-               set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
-                                  &user_sgpr_idx);
-       }
+       set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
+                          &user_sgpr_idx);
 
        /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
         * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
index a7442c617de0286e56734b54977395a615531734..0f57058bd6e70f9265e6953df9dbd702ae081a8e 100644 (file)
@@ -34,6 +34,7 @@ struct radv_shader_args {
 
        struct ac_arg descriptor_sets[MAX_SETS];
        struct ac_arg ring_offsets;
+       struct ac_arg scratch_offset;
 
        struct ac_arg vertex_buffers;
        struct ac_arg rel_auto_id;