radeonsi: merge constant and shader buffers descriptor lists into one
authorMarek Olšák <marek.olsak@amd.com>
Sat, 13 May 2017 12:01:27 +0000 (14:01 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 18 May 2017 20:15:02 +0000 (22:15 +0200)
Constant buffers: slot[16], .. slot[31] (ascending)
Shader buffers: slot[15], .. slot[0] (descending)

The idea is that if we have 4 constant buffers and 2 shader buffers, we only
have to upload 6 slots. That optimization is left for a later commit.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_debug.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
src/gallium/drivers/radeonsi/si_state.h

index d1159ad5e27c6fe206c81d47d816d86070a49fdf..25c3882a115e35b1b67242536bb36108eede6ad3 100644 (file)
@@ -380,23 +380,24 @@ static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
        }
 }
 
+typedef unsigned (*slot_remap_func)(unsigned);
+
 static void si_dump_descriptor_list(struct si_descriptors *desc,
                                    const char *shader_name,
                                    const char *elem_name,
                                    unsigned num_elements,
+                                   slot_remap_func slot_remap,
                                    FILE *f)
 {
        unsigned i, j;
-       uint32_t *cpu_list = desc->list;
-       uint32_t *gpu_list = desc->gpu_list;
-       const char *list_note = "GPU list";
-
-       if (!gpu_list) {
-               gpu_list = cpu_list;
-               list_note = "CPU list";
-       }
 
        for (i = 0; i < num_elements; i++) {
+               unsigned dw_offset = slot_remap(i) * desc->element_dw_size;
+               uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : desc->list;
+               const char *list_note = desc->gpu_list ? "GPU list" : "CPU list";
+               uint32_t *cpu_list = desc->list + dw_offset;
+               uint32_t *gpu_list = gpu_ptr + dw_offset;
+
                fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
                        shader_name, elem_name, i, list_note);
 
@@ -444,11 +445,14 @@ static void si_dump_descriptor_list(struct si_descriptors *desc,
                }
 
                fprintf(f, "\n");
-               gpu_list += desc->element_dw_size;
-               cpu_list += desc->element_dw_size;
        }
 }
 
+static unsigned si_identity(unsigned slot)
+{
+       return slot;
+}
+
 static void si_dump_descriptors(struct si_context *sctx,
                                enum pipe_shader_type processor,
                                const struct tgsi_shader_info *info, FILE *f)
@@ -464,9 +468,16 @@ static void si_dump_descriptors(struct si_context *sctx,
                " - Sampler",
                " - Image",
        };
+       static const slot_remap_func remap_func[] = {
+               si_get_constbuf_slot,
+               si_get_shaderbuf_slot,
+               si_identity,
+               si_identity,
+       };
        unsigned enabled_slots[] = {
-               sctx->const_buffers[processor].enabled_mask,
-               sctx->shader_buffers[processor].enabled_mask,
+               sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS,
+               util_bitreverse(sctx->const_and_shader_buffers[processor].enabled_mask &
+                               u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS)),
                sctx->samplers[processor].views.enabled_mask,
                sctx->images[processor].enabled_mask,
        };
@@ -481,12 +492,14 @@ static void si_dump_descriptors(struct si_context *sctx,
                assert(info); /* only CS may not have an info struct */
 
                si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[processor],
-                                       " - Vertex buffer", info->num_inputs, f);
+                                       " - Vertex buffer", info->num_inputs,
+                                       si_identity, f);
        }
 
        for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs)
                si_dump_descriptor_list(descs, shader_name[processor], elem_name[i],
-                                       util_last_bit(enabled_slots[i] | required_slots[i]), f);
+                                       util_last_bit(enabled_slots[i] | required_slots[i]),
+                                       remap_func[i], f);
 }
 
 static void si_dump_gfx_descriptors(struct si_context *sctx,
@@ -805,7 +818,8 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
                }
 
                si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
-                                       "", "RW buffers", SI_NUM_RW_BUFFERS, f);
+                                       "", "RW buffers", SI_NUM_RW_BUFFERS,
+                                       si_identity, f);
                si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
                si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
                si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
index c92a6575ede4b613fe2b2fe58305b3441d4f503d..2e3a9c5f9e221988d9bbeb7b8a299d21ea46a3d8 100644 (file)
@@ -936,11 +936,15 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers,
                                     unsigned num_buffers,
                                     unsigned shader_userdata_index,
                                     enum radeon_bo_usage shader_usage,
+                                    enum radeon_bo_usage shader_usage_constbuf,
                                     enum radeon_bo_priority priority,
+                                    enum radeon_bo_priority priority_constbuf,
                                     unsigned *ce_offset)
 {
        buffers->shader_usage = shader_usage;
+       buffers->shader_usage_constbuf = shader_usage_constbuf;
        buffers->priority = priority;
+       buffers->priority_constbuf = priority_constbuf;
        buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
 
        si_init_descriptors(descs, shader_userdata_index, 4,
@@ -969,8 +973,11 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
                int i = u_bit_scan(&mask);
 
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-                                     (struct r600_resource*)buffers->buffers[i],
-                                     buffers->shader_usage, buffers->priority);
+                       r600_resource(buffers->buffers[i]),
+                       i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
+                                                   buffers->shader_usage_constbuf,
+                       i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
+                                                   buffers->priority_constbuf);
        }
 }
 
@@ -1119,16 +1126,16 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 /* CONSTANT BUFFERS */
 
 static unsigned
-si_const_buffer_descriptors_idx(unsigned shader)
+si_const_and_shader_buffer_descriptors_idx(unsigned shader)
 {
        return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
-              SI_SHADER_DESCS_CONST_BUFFERS;
+              SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
 }
 
 static struct si_descriptors *
-si_const_buffer_descriptors(struct si_context *sctx, unsigned shader)
+si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader)
 {
-       return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)];
+       return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
 }
 
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
@@ -1199,8 +1206,8 @@ static void si_set_constant_buffer(struct si_context *sctx,
                buffers->buffers[slot] = buffer;
                radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
                                                    (struct r600_resource*)buffer,
-                                                   buffers->shader_usage,
-                                                   buffers->priority, true);
+                                                   buffers->shader_usage_constbuf,
+                                                   buffers->priority_constbuf, true);
                buffers->enabled_mask |= 1u << slot;
        } else {
                /* Clear the descriptor. */
@@ -1228,8 +1235,9 @@ static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
        if (shader >= SI_NUM_SHADERS)
                return;
 
-       si_set_constant_buffer(sctx, &sctx->const_buffers[shader],
-                              si_const_buffer_descriptors_idx(shader),
+       slot = si_get_constbuf_slot(slot);
+       si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
+                              si_const_and_shader_buffer_descriptors_idx(shader),
                               slot, input);
 }
 
@@ -1238,35 +1246,22 @@ void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
 {
        cbuf->user_buffer = NULL;
        si_get_buffer_from_descriptors(
-               &sctx->const_buffers[shader],
-               si_const_buffer_descriptors(sctx, shader),
-               slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
+               &sctx->const_and_shader_buffers[shader],
+               si_const_and_shader_buffer_descriptors(sctx, shader),
+               si_get_constbuf_slot(slot),
+               &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
 }
 
 /* SHADER BUFFERS */
 
-static unsigned
-si_shader_buffer_descriptors_idx(enum pipe_shader_type shader)
-{
-       return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
-              SI_SHADER_DESCS_SHADER_BUFFERS;
-}
-
-static struct si_descriptors *
-si_shader_buffer_descriptors(struct si_context *sctx,
-                                 enum pipe_shader_type shader)
-{
-       return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)];
-}
-
 static void si_set_shader_buffers(struct pipe_context *ctx,
                                  enum pipe_shader_type shader,
                                  unsigned start_slot, unsigned count,
                                  const struct pipe_shader_buffer *sbuffers)
 {
        struct si_context *sctx = (struct si_context *)ctx;
-       struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
-       struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
+       struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
+       struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
        unsigned i;
 
        assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
@@ -1274,7 +1269,7 @@ static void si_set_shader_buffers(struct pipe_context *ctx,
        for (i = 0; i < count; ++i) {
                const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
                struct r600_resource *buf;
-               unsigned slot = start_slot + i;
+               unsigned slot = si_get_shaderbuf_slot(start_slot + i);
                uint32_t *desc = descs->list + slot * 4;
                uint64_t va;
 
@@ -1284,7 +1279,7 @@ static void si_set_shader_buffers(struct pipe_context *ctx,
                        buffers->enabled_mask &= ~(1u << slot);
                        descs->dirty_mask |= 1u << slot;
                        sctx->descriptors_dirty |=
-                               1u << si_shader_buffer_descriptors_idx(shader);
+                               1u << si_const_and_shader_buffer_descriptors_idx(shader);
                        continue;
                }
 
@@ -1311,7 +1306,7 @@ static void si_set_shader_buffers(struct pipe_context *ctx,
                buffers->enabled_mask |= 1u << slot;
                descs->dirty_mask |= 1u << slot;
                sctx->descriptors_dirty |=
-                       1u << si_shader_buffer_descriptors_idx(shader);
+                       1u << si_const_and_shader_buffer_descriptors_idx(shader);
 
                util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
                               sbuffer->buffer_offset + sbuffer->buffer_size);
@@ -1323,12 +1318,13 @@ void si_get_shader_buffers(struct si_context *sctx,
                           uint start_slot, uint count,
                           struct pipe_shader_buffer *sbuf)
 {
-       struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
-       struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader);
+       struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
+       struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
 
        for (unsigned i = 0; i < count; ++i) {
                si_get_buffer_from_descriptors(
-                       buffers, descs, start_slot + i,
+                       buffers, descs,
+                       si_get_shaderbuf_slot(start_slot + i),
                        &sbuf[i].buffer, &sbuf[i].buffer_offset,
                        &sbuf[i].buffer_size);
        }
@@ -1603,11 +1599,14 @@ void si_update_compressed_colortex_masks(struct si_context *sctx)
 static void si_reset_buffer_resources(struct si_context *sctx,
                                      struct si_buffer_resources *buffers,
                                      unsigned descriptors_idx,
+                                     unsigned slot_mask,
                                      struct pipe_resource *buf,
-                                     uint64_t old_va)
+                                     uint64_t old_va,
+                                     enum radeon_bo_usage usage,
+                                     enum radeon_bo_priority priority)
 {
        struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
-       unsigned mask = buffers->enabled_mask;
+       unsigned mask = buffers->enabled_mask & slot_mask;
 
        while (mask) {
                unsigned i = u_bit_scan(&mask);
@@ -1620,8 +1619,7 @@ static void si_reset_buffer_resources(struct si_context *sctx,
 
                        radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
                                                            (struct r600_resource *)buf,
-                                                           buffers->shader_usage,
-                                                           buffers->priority, true);
+                                                           usage, priority, true);
                }
        }
 }
@@ -1690,16 +1688,22 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
        /* Constant and shader buffers. */
        if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
                for (shader = 0; shader < SI_NUM_SHADERS; shader++)
-                       si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
-                                                 si_const_buffer_descriptors_idx(shader),
-                                                 buf, old_va);
+                       si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
+                                                 si_const_and_shader_buffer_descriptors_idx(shader),
+                                                 u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
+                                                 buf, old_va,
+                                                 sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
+                                                 sctx->const_and_shader_buffers[shader].priority_constbuf);
        }
 
        if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
                for (shader = 0; shader < SI_NUM_SHADERS; shader++)
-                       si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
-                                                 si_shader_buffer_descriptors_idx(shader),
-                                                 buf, old_va);
+                       si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
+                                                 si_const_and_shader_buffer_descriptors_idx(shader),
+                                                 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
+                                                 buf, old_va,
+                                                 sctx->const_and_shader_buffers[shader].shader_usage,
+                                                 sctx->const_and_shader_buffers[shader].priority);
        }
 
        if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
@@ -2000,8 +2004,8 @@ void si_init_all_descriptors(struct si_context *sctx)
        int i;
        unsigned ce_offset = 0;
 
-       STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
-       STATIC_ASSERT(GFX9_SGPR_GS_CONST_BUFFERS % 2 == 0);
+       STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
+       STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
 
        for (i = 0; i < SI_NUM_SHADERS; i++) {
                bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
@@ -2013,27 +2017,23 @@ void si_init_all_descriptors(struct si_context *sctx)
                 */
                bool big_ce = sctx->b.chip_class <= VI;
                bool images_use_ce = big_ce;
-               bool shaderbufs_use_ce = big_ce ||
-                                        i == PIPE_SHADER_COMPUTE;
+               bool const_and_shaderbufs_use_ce = big_ce ||
+                                                  i == PIPE_SHADER_VERTEX ||
+                                                  i == PIPE_SHADER_FRAGMENT;
                bool samplers_use_ce = big_ce ||
                                       i == PIPE_SHADER_FRAGMENT;
 
-               si_init_buffer_resources(&sctx->const_buffers[i],
-                                        si_const_buffer_descriptors(sctx, i),
-                                        SI_NUM_CONST_BUFFERS,
-                                        gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
-                                        gfx9_gs ? GFX9_SGPR_GS_CONST_BUFFERS :
-                                                  SI_SGPR_CONST_BUFFERS,
-                                        RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
-                                        &ce_offset);
-               si_init_buffer_resources(&sctx->shader_buffers[i],
-                                        si_shader_buffer_descriptors(sctx, i),
-                                        SI_NUM_SHADER_BUFFERS,
-                                        gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
-                                        gfx9_gs ? GFX9_SGPR_GS_SHADER_BUFFERS :
-                                                  SI_SGPR_SHADER_BUFFERS,
-                                        RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
-                                        shaderbufs_use_ce ? &ce_offset : NULL);
+               si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
+                                        si_const_and_shader_buffer_descriptors(sctx, i),
+                                        SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS,
+                                        gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
+                                        gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
+                                                  SI_SGPR_CONST_AND_SHADER_BUFFERS,
+                                        RADEON_USAGE_READWRITE,
+                                        RADEON_USAGE_READ,
+                                        RADEON_PRIO_SHADER_RW_BUFFER,
+                                        RADEON_PRIO_CONST_BUFFER,
+                                        const_and_shaderbufs_use_ce ? &ce_offset : NULL);
 
                si_init_descriptors(si_sampler_descriptors(sctx, i),
                                    gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
@@ -2055,7 +2055,10 @@ void si_init_all_descriptors(struct si_context *sctx)
        si_init_buffer_resources(&sctx->rw_buffers,
                                 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
                                 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
-                                RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS,
+                                /* The second set of usage/priority is used by
+                                 * const buffers in RW buffer slots. */
+                                RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
+                                RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
                                 &ce_offset);
        si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
                            4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
@@ -2148,10 +2151,8 @@ void si_release_all_descriptors(struct si_context *sctx)
        int i;
 
        for (i = 0; i < SI_NUM_SHADERS; i++) {
-               si_release_buffer_resources(&sctx->const_buffers[i],
-                                           si_const_buffer_descriptors(sctx, i));
-               si_release_buffer_resources(&sctx->shader_buffers[i],
-                                           si_shader_buffer_descriptors(sctx, i));
+               si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
+                                           si_const_and_shader_buffer_descriptors(sctx, i));
                si_release_sampler_views(&sctx->samplers[i].views);
                si_release_image_views(&sctx->images[i]);
        }
@@ -2168,8 +2169,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
        int i;
 
        for (i = 0; i < SI_NUM_SHADERS; i++) {
-               si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
-               si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
+               si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
                si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
                si_image_views_begin_new_cs(sctx, &sctx->images[i]);
        }
index 431d8a3a4290f34e8cd3e6e8f0e7623bef8a35bb..449a802f76b90723ec52e2c1f2412e50bb459417 100644 (file)
@@ -295,8 +295,7 @@ struct si_context {
        unsigned                        shader_pointers_dirty;
        unsigned                        compressed_tex_shader_mask;
        struct si_buffer_resources      rw_buffers;
-       struct si_buffer_resources      const_buffers[SI_NUM_SHADERS];
-       struct si_buffer_resources      shader_buffers[SI_NUM_SHADERS];
+       struct si_buffer_resources      const_and_shader_buffers[SI_NUM_SHADERS];
        struct si_textures_info         samplers[SI_NUM_SHADERS];
        struct si_images_info           images[SI_NUM_SHADERS];
 
index a49449b4ef5b407faa9eb90ef4de7bf82515ec5e..8c5bcb9f5763a65ce76d50790d9eb64563e6deb5 100644 (file)
@@ -1726,10 +1726,10 @@ static void declare_compute_memory(struct si_shader_context *ctx,
 static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
 {
        LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
-                                            ctx->param_const_buffers);
+                                            ctx->param_const_and_shader_buffers);
 
        return ac_build_indexed_load_const(&ctx->ac, list_ptr,
-                                       LLVMConstInt(ctx->i32, i, 0));
+                       LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
 }
 
 static LLVMValueRef fetch_constant(
@@ -1759,11 +1759,13 @@ static LLVMValueRef fetch_constant(
        idx = reg->Register.Index * 4 + swizzle;
 
        if (reg->Register.Dimension && reg->Dimension.Indirect) {
-               LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_buffers);
+               LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
                LLVMValueRef index;
                index = si_get_bounded_indirect_index(ctx, &reg->DimIndirect,
                                                      reg->Dimension.Index,
                                                      SI_NUM_CONST_BUFFERS);
+               index = LLVMBuildAdd(ctx->gallivm.builder, index,
+                                    LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
                bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index);
        } else
                bufp = load_const_buffer_desc(ctx, buf);
@@ -2796,13 +2798,11 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
 
        unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2;
        ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
-                                          8 + GFX9_SGPR_TCS_CONST_BUFFERS);
+                                          8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
        ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
                                           8 + GFX9_SGPR_TCS_SAMPLERS);
        ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
                                           8 + GFX9_SGPR_TCS_IMAGES);
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
-                                          8 + GFX9_SGPR_TCS_SHADER_BUFFERS);
 
        unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
        ret = si_insert_input_ret_float(ctx, ret,
@@ -2825,13 +2825,11 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
 
        unsigned desc_param = ctx->param_vs_state_bits + 1;
        ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
-                                          8 + GFX9_SGPR_GS_CONST_BUFFERS);
+                                          8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
        ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
                                           8 + GFX9_SGPR_GS_SAMPLERS);
        ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
                                           8 + GFX9_SGPR_GS_IMAGES);
-       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
-                                          8 + GFX9_SGPR_GS_SHADER_BUFFERS);
 
        unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
        for (unsigned i = 0; i < 5; i++) {
@@ -4061,16 +4059,15 @@ static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
                                            unsigned *num_params,
                                            bool assign_params)
 {
-       params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_CONST_BUFFERS);
+       params[(*num_params)++] = si_const_array(ctx->v4i32,
+                                                SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
        params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS);
        params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES);
-       params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
 
        if (assign_params) {
-               ctx->param_const_buffers  = *num_params - 4;
-               ctx->param_samplers       = *num_params - 3;
-               ctx->param_images         = *num_params - 2;
-               ctx->param_shader_buffers = *num_params - 1;
+               ctx->param_const_and_shader_buffers = *num_params - 3;
+               ctx->param_samplers       = *num_params - 2;
+               ctx->param_images         = *num_params - 1;
        }
 }
 
@@ -6670,7 +6667,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
-               params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
@@ -6685,7 +6681,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
                params[num_params++] = ctx->i64;
-               params[num_params++] = ctx->i64;
                params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
                params[num_params++] = ctx->i32;
@@ -7042,10 +7037,9 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 
        /* Declare input SGPRs. */
        params[ctx->param_rw_buffers = num_params++] = ctx->i64;
-       params[ctx->param_const_buffers = num_params++] = ctx->i64;
+       params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
        params[ctx->param_samplers = num_params++] = ctx->i64;
        params[ctx->param_images = num_params++] = ctx->i64;
-       params[ctx->param_shader_buffers = num_params++] = ctx->i64;
        assert(num_params == SI_PARAM_ALPHA_REF);
        params[SI_PARAM_ALPHA_REF] = ctx->f32;
        last_sgpr = SI_PARAM_ALPHA_REF;
index 1627de3980be85eb81a3c9d0aaa854172faec47d..08e809c56b74c6a1b19ec86970517ba3d3fc9127 100644 (file)
@@ -157,14 +157,12 @@ enum {
         */
        SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
        SI_SGPR_RW_BUFFERS_HI,
-       SI_SGPR_CONST_BUFFERS,
-       SI_SGPR_CONST_BUFFERS_HI,
+       SI_SGPR_CONST_AND_SHADER_BUFFERS,
+       SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
        SI_SGPR_SAMPLERS,  /* images & sampler states interleaved */
        SI_SGPR_SAMPLERS_HI,
        SI_SGPR_IMAGES,
        SI_SGPR_IMAGES_HI,
-       SI_SGPR_SHADER_BUFFERS,
-       SI_SGPR_SHADER_BUFFERS_HI,
        SI_NUM_RESOURCE_SGPRS,
 
        /* all VS variants */
@@ -197,25 +195,21 @@ enum {
        GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K,
        GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K,
        GFX9_SGPR_unused_to_align_the_next_pointer,
-       GFX9_SGPR_TCS_CONST_BUFFERS,
-       GFX9_SGPR_TCS_CONST_BUFFERS_HI,
+       GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS,
+       GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI,
        GFX9_SGPR_TCS_SAMPLERS,  /* images & sampler states interleaved */
        GFX9_SGPR_TCS_SAMPLERS_HI,
        GFX9_SGPR_TCS_IMAGES,
        GFX9_SGPR_TCS_IMAGES_HI,
-       GFX9_SGPR_TCS_SHADER_BUFFERS,
-       GFX9_SGPR_TCS_SHADER_BUFFERS_HI,
        GFX9_TCS_NUM_USER_SGPR,
 
        /* GFX9: Merged ES-GS (VS-GS or TES-GS). */
-       GFX9_SGPR_GS_CONST_BUFFERS = SI_VS_NUM_USER_SGPR,
-       GFX9_SGPR_GS_CONST_BUFFERS_HI,
+       GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR,
+       GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI,
        GFX9_SGPR_GS_SAMPLERS,
        GFX9_SGPR_GS_SAMPLERS_HI,
        GFX9_SGPR_GS_IMAGES,
        GFX9_SGPR_GS_IMAGES_HI,
-       GFX9_SGPR_GS_SHADER_BUFFERS,
-       GFX9_SGPR_GS_SHADER_BUFFERS_HI,
        GFX9_GS_NUM_USER_SGPR,
 
        /* GS limits */
@@ -229,7 +223,7 @@ enum {
 
 /* LLVM function parameter indices */
 enum {
-       SI_NUM_RESOURCE_PARAMS = 5,
+       SI_NUM_RESOURCE_PARAMS = 4,
 
        /* PS only parameters */
        SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
index 69e6dfcf920862a969c40a2bfaa57fd270dfb2bd..9fd027d4cbd62c8bf9b67ae45e0dcaa2a07b026c 100644 (file)
@@ -109,10 +109,9 @@ struct si_shader_context {
 
        /* Parameter indices for LLVMGetParam. */
        int param_rw_buffers;
-       int param_const_buffers;
+       int param_const_and_shader_buffers;
        int param_samplers;
        int param_images;
-       int param_shader_buffers;
        /* Common inputs for merged shaders. */
        int param_merged_wave_info;
        int param_merged_scratch_offset;
index a47d5bbae3beb2516afd5027e41256ffa4c90bb0..1e5842b818f8dc7c1f9a61a6749a3693fb48e80b 100644 (file)
@@ -84,14 +84,19 @@ shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
 {
        LLVMValueRef index;
        LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
-                                            ctx->param_shader_buffers);
+                                            ctx->param_const_and_shader_buffers);
 
-       if (!reg->Register.Indirect)
-               index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
-       else
+       if (!reg->Register.Indirect) {
+               index = LLVMConstInt(ctx->i32,
+                                    si_get_shaderbuf_slot(reg->Register.Index), 0);
+       } else {
                index = si_get_bounded_indirect_index(ctx, &reg->Indirect,
                                                      reg->Register.Index,
                                                      SI_NUM_SHADER_BUFFERS);
+               index = LLVMBuildSub(ctx->gallivm.builder,
+                                    LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
+                                    index, "");
+       }
 
        return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index);
 }
index 629d614f7fcf6bcd8167666603c7dd1974222507..90d097209685d5b8c4248a525a75fbd2852cd2ee 100644 (file)
@@ -194,11 +194,12 @@ enum {
  *  21 - compute const buffers
  *   ...
  */
-#define SI_SHADER_DESCS_CONST_BUFFERS  0
-#define SI_SHADER_DESCS_SHADER_BUFFERS 1
-#define SI_SHADER_DESCS_SAMPLERS       2
-#define SI_SHADER_DESCS_IMAGES         3
-#define SI_NUM_SHADER_DESCS            4
+enum {
+       SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
+       SI_SHADER_DESCS_SAMPLERS,
+       SI_SHADER_DESCS_IMAGES,
+       SI_NUM_SHADER_DESCS,
+};
 
 #define SI_DESCS_RW_BUFFERS            0
 #define SI_DESCS_FIRST_SHADER          1
@@ -251,7 +252,9 @@ struct si_sampler_views {
 
 struct si_buffer_resources {
        enum radeon_bo_usage            shader_usage; /* READ, WRITE, or READWRITE */
+       enum radeon_bo_usage            shader_usage_constbuf;
        enum radeon_bo_priority         priority;
+       enum radeon_bo_priority         priority_constbuf;
        struct pipe_resource            **buffers; /* this has num_buffers elements */
 
        /* The i-th bit is set if that element is enabled (non-NULL resource). */
@@ -372,4 +375,16 @@ si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
                return rtex->surface.u.legacy.tiling_index[level];
 }
 
+static inline unsigned si_get_constbuf_slot(unsigned slot)
+{
+       /* Constant buffers are in slots [16..31], ascending */
+       return SI_NUM_SHADER_BUFFERS + slot;
+}
+
+static inline unsigned si_get_shaderbuf_slot(unsigned slot)
+{
+       /* shader buffers are in slots [15..0], descending */
+       return SI_NUM_SHADER_BUFFERS - 1 - slot;
+}
+
 #endif