radv/ac: add support for indirect access of descriptor sets.
authorDave Airlie <airlied@redhat.com>
Tue, 18 Apr 2017 00:21:59 +0000 (10:21 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 18 Apr 2017 23:00:43 +0000 (09:00 +1000)
We want to expose more descriptor sets to the applications,
but currently we have a 1:1 mapping between shader descriptor
sets and 2 user sgprs, limiting us to 4 per stage. This commit
check if we don't have enough user sgprs for the number of
bound sets for this shader, we can ask for them to be indirected.

Two sgprs are then used to point to a buffer or 64-bit pointers
to the number of allocated descriptor sets. All shaders point
to the same buffer.

We can use some user sgprs to inline one or two descriptor sets
in future, but until we have a workload that needs this I don't
 think we should spend too much time on it.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_nir_to_llvm.h
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h

index 31c3a6c22404af41e11939c311b75032eb76e66b..540de62d59482ee07c0c44d9d73eb382ac690993 100644 (file)
@@ -537,7 +537,7 @@ static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
        set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
 }
 
-#if 0
+
 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
                                           uint32_t indirect_offset)
 {
@@ -546,7 +546,6 @@ static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uin
        ud_info->indirect = true;
        ud_info->indirect_offset = indirect_offset;
 }
-#endif
 
 static void declare_tess_lds(struct nir_to_llvm_context *ctx)
 {
@@ -559,6 +558,7 @@ static void declare_tess_lds(struct nir_to_llvm_context *ctx)
 struct user_sgpr_info {
        bool need_ring_offsets;
        uint8_t sgpr_count;
+       bool indirect_all_descriptor_sets;
 };
 
 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
@@ -623,6 +623,8 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
                fprintf(stderr, "radv: TODO: add support for indirect sgprs\n");
                /* need to add support for indirect descriptor sets */
                assert(0);
+               user_sgpr_info->sgpr_count += 2;
+               user_sgpr_info->indirect_all_descriptor_sets = true;
        } else {
                user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
        }
@@ -645,11 +647,16 @@ static void create_function(struct nir_to_llvm_context *ctx)
        }
 
        /* 1 for each descriptor set */
-       for (unsigned i = 0; i < num_sets; ++i) {
-               if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
-                       array_params_mask |= (1 << arg_idx);
-                       arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
+       if (!user_sgpr_info.indirect_all_descriptor_sets) {
+               for (unsigned i = 0; i < num_sets; ++i) {
+                       if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
+                               array_params_mask |= (1 << arg_idx);
+                               arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
+                       }
                }
+       } else {
+               array_params_mask |= (1 << arg_idx);
+               arg_types[arg_idx++] = const_array(const_array(ctx->i8, 1024 * 1024), 32);
        }
 
        if (ctx->shader_info->info.needs_push_constants) {
@@ -801,14 +808,31 @@ static void create_function(struct nir_to_llvm_context *ctx)
                        ctx->ring_offsets = LLVMGetParam(ctx->main_function, arg_idx++);
        }
 
-       for (unsigned i = 0; i < num_sets; ++i) {
-               if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
-                       set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
-                       user_sgpr_idx += 2;
-                       ctx->descriptor_sets[i] =
-                               LLVMGetParam(ctx->main_function, arg_idx++);
-               } else
-                       ctx->descriptor_sets[i] = NULL;
+       if (!user_sgpr_info.indirect_all_descriptor_sets) {
+               for (unsigned i = 0; i < num_sets; ++i) {
+                       if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
+                               set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
+                               user_sgpr_idx += 2;
+                               ctx->descriptor_sets[i] =
+                                       LLVMGetParam(ctx->main_function, arg_idx++);
+                       } else
+                               ctx->descriptor_sets[i] = NULL;
+               }
+       } else {
+               uint32_t desc_sgpr_idx = user_sgpr_idx;
+               LLVMValueRef desc_sets = LLVMGetParam(ctx->main_function, arg_idx++);
+               set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx, 2);
+               user_sgpr_idx += 2;
+
+               for (unsigned i = 0; i < num_sets; ++i) {
+                       if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
+                               set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
+                               ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
+
+                       } else
+                               ctx->descriptor_sets[i] = NULL;
+               }
+               ctx->shader_info->need_indirect_descriptor_sets = true;
        }
 
        if (ctx->shader_info->info.needs_push_constants) {
index 7a4065ac5fabcf1b48f50a08052f1d7186410b40..401d284a7c4134f0ad33c58d6f5221cb329d7fd8 100644 (file)
@@ -83,7 +83,8 @@ struct ac_userdata_info {
 enum ac_ud_index {
        AC_UD_SCRATCH_RING_OFFSETS = 0,
        AC_UD_PUSH_CONSTANTS = 1,
-       AC_UD_SHADER_START = 2,
+       AC_UD_INDIRECT_DESCRIPTOR_SETS = 2,
+       AC_UD_SHADER_START = 3,
        AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
        AC_UD_VS_BASE_VERTEX_START_INSTANCE,
        AC_UD_VS_LS_TCS_IN_LAYOUT,
@@ -142,7 +143,7 @@ struct ac_shader_variant_info {
        unsigned num_user_sgprs;
        unsigned num_input_sgprs;
        unsigned num_input_vgprs;
-
+       bool need_indirect_descriptor_sets;
        union {
                struct {
                        struct ac_vs_output_info outinfo;
index 7056d0d0266a32e6b93d58a30b34395ba9576cf8..d95daff2c791db7191796ebc7b08cfc3b7b33164 100644 (file)
@@ -1246,7 +1246,7 @@ emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
        struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx];
        uint32_t base_reg = shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
 
-       if (desc_set_loc->sgpr_idx == -1)
+       if (desc_set_loc->sgpr_idx == -1 || desc_set_loc->indirect)
                return;
 
        assert(!desc_set_loc->indirect);
@@ -1313,18 +1313,73 @@ radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer)
        memcpy(ptr, set->mapped_ptr, set->size);
 }
 
+static void
+radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
+                                   struct radv_pipeline *pipeline)
+{
+       uint32_t size = MAX_SETS * 2 * 4;
+       uint32_t offset;
+       void *ptr;
+       
+       if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size,
+                                         256, &offset, &ptr))
+               return;
+
+       for (unsigned i = 0; i < MAX_SETS; i++) {
+               uint32_t *uptr = ((uint32_t *)ptr) + i * 2;
+               uint64_t set_va = 0;
+               struct radv_descriptor_set *set = cmd_buffer->state.descriptors[i];
+               if (set)
+                       set_va = set->va;
+               uptr[0] = set_va & 0xffffffff;
+               uptr[1] = set_va >> 32;
+       }
+
+       uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+       va += offset;
+
+       if (pipeline->shaders[MESA_SHADER_VERTEX])
+               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX,
+                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+
+       if (pipeline->shaders[MESA_SHADER_FRAGMENT])
+               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_FRAGMENT,
+                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+
+       if (radv_pipeline_has_gs(pipeline))
+               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_GEOMETRY,
+                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+
+       if (radv_pipeline_has_tess(pipeline))
+               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_TESS_CTRL,
+                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+
+       if (radv_pipeline_has_tess(pipeline))
+               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_TESS_EVAL,
+                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+
+       if (pipeline->shaders[MESA_SHADER_COMPUTE])
+               radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_COMPUTE,
+                                          AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+}
+
 static void
 radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
                       struct radv_pipeline *pipeline,
                       VkShaderStageFlags stages)
 {
        unsigned i;
+
        if (!cmd_buffer->state.descriptors_dirty)
                return;
 
        if (cmd_buffer->state.push_descriptors_dirty)
                radv_flush_push_descriptors(cmd_buffer);
 
+       if (pipeline->need_indirect_descriptor_sets) {
+               radv_flush_indirect_descriptor_sets(cmd_buffer, pipeline);
+       }
+
        for (i = 0; i < MAX_SETS; i++) {
                if (!(cmd_buffer->state.descriptors_dirty & (1 << i)))
                        continue;
index cf11362eade948f761f7078b44a46f06235af678..8e71d59fae7f2ed1f3a49e9c06490cd453b3cb66 100644 (file)
@@ -2112,6 +2112,12 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
        calculate_pa_cl_vs_out_cntl(pipeline);
        calculate_ps_inputs(pipeline);
 
+       for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+               if (pipeline->shaders[i]) {
+                       pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets;
+               }
+       }
+
        uint32_t stages = 0;
        if (radv_pipeline_has_tess(pipeline)) {
                stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
@@ -2270,6 +2276,7 @@ static VkResult radv_compute_pipeline_create(
                                       pipeline->layout, NULL);
 
 
+       pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
        result = radv_pipeline_scratch_init(device, pipeline);
        if (result != VK_SUCCESS) {
                radv_pipeline_destroy(device, pipeline, pAllocator);
index 4ace068742d14856d39fb1cb22c89df97e77cdc0..0d60aac106941eb456aabdf806457d2026ad2d8d 100644 (file)
@@ -1007,7 +1007,7 @@ struct radv_pipeline {
        struct radv_pipeline_layout *                 layout;
 
        bool                                         needs_data_cache;
-
+       bool                                         need_indirect_descriptor_sets;
        struct radv_shader_variant *                 shaders[MESA_SHADER_STAGES];
        struct radv_shader_variant *gs_copy_shader;
        VkShaderStageFlags                           active_stages;