radeonsi: rework uploading border colors
authorMarek Olšák <marek.olsak@amd.com>
Sun, 30 Aug 2015 12:13:10 +0000 (14:13 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 1 Sep 2015 19:51:15 +0000 (21:51 +0200)
The border colors are uploaded only once when the state is created.

This brings truly immutable sampler descriptors, because they don't have
to be updated every time a sampler state is re-bound.

It also moves the TA_BC_BASE_ADDR registers to init_config, removing one
more state. The catch is there is now a limit: only 4096 border colors can
be used by one context. I don't think that will be a problem.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h

index 3041da621c3bc756c068b8209acbe51f2f3ee97a..92a7068e7159340c2b13ff45e4d177b05326abba 100644 (file)
@@ -273,13 +273,17 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
                              RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
 }
 
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
-                               unsigned start, unsigned count, void **states)
+static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
+                                   unsigned start, unsigned count, void **states)
 {
+       struct si_context *sctx = (struct si_context *)ctx;
        struct si_sampler_states *samplers = &sctx->samplers[shader].states;
        struct si_sampler_state **sstates = (struct si_sampler_state**)states;
        int i;
 
+       if (!count || shader >= SI_NUM_SHADERS)
+               return;
+
        if (start == 0)
                samplers->saved_states[0] = states[0];
        if (start == 1)
@@ -1022,6 +1026,7 @@ void si_init_all_descriptors(struct si_context *sctx)
                            4, SI_NUM_VERTEX_BUFFERS);
 
        /* Set pipe_context functions. */
+       sctx->b.b.bind_sampler_states = si_bind_sampler_states;
        sctx->b.b.set_constant_buffer = si_set_constant_buffer;
        sctx->b.b.set_sampler_views = si_set_sampler_views;
        sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
index da77478952536985eeaf15cd66d08b823ed5ca90..d68ea5fb31d684753907a5de12e0a4758fdbcb82 100644 (file)
@@ -44,7 +44,8 @@ static void si_destroy_context(struct pipe_context *context)
        pipe_resource_reference(&sctx->gsvs_ring, NULL);
        pipe_resource_reference(&sctx->tf_ring, NULL);
        pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
-       r600_resource_reference(&sctx->border_color_table, NULL);
+       r600_resource_reference(&sctx->border_color_buffer, NULL);
+       free(sctx->border_color_table);
        r600_resource_reference(&sctx->scratch_buffer, NULL);
        sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
 
@@ -139,6 +140,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
                                                sscreen->b.trace_bo->cs_buf : NULL);
        sctx->b.rings.gfx.flush = si_context_gfx_flush;
 
+       /* Border colors. */
+       sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
+                                         sizeof(*sctx->border_color_table));
+       if (!sctx->border_color_table)
+               goto fail;
+
+       sctx->border_color_buffer = (struct r600_resource*)
+               pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT,
+                                  SI_MAX_BORDER_COLORS *
+                                  sizeof(*sctx->border_color_table));
+       if (!sctx->border_color_buffer)
+               goto fail;
+
+       sctx->border_color_map =
+               ws->buffer_map(sctx->border_color_buffer->cs_buf,
+                              NULL, PIPE_TRANSFER_WRITE);
+       if (!sctx->border_color_map)
+               goto fail;
+
        si_init_all_descriptors(sctx);
        si_init_state_functions(sctx);
        si_init_shader_functions(sctx);
@@ -197,6 +217,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
        return &sctx->b.b;
 fail:
+       fprintf(stderr, "radeonsi: Failed to create a context.\n");
        si_destroy_context(&sctx->b.b);
        return NULL;
 }
index 02d75f00f98f447a8626f0573a0f4da0fe772592..847853e59e95319d02b5a3e0da1a5dfdd97c73b3 100644 (file)
@@ -79,6 +79,7 @@
 #define SI_GET_TRACE_POINT_ID(x)       ((x) & 0xffff)
 
 #define SI_MAX_VIEWPORTS       16
+#define SI_MAX_BORDER_COLORS   4096
 
 struct si_compute;
 
@@ -103,7 +104,6 @@ struct si_sampler_view {
 
 struct si_sampler_state {
        uint32_t                        val[4];
-       uint32_t                        border_color[4];
 };
 
 struct si_cs_shader_state {
@@ -219,8 +219,10 @@ struct si_context {
        struct pipe_resource            *esgs_ring;
        struct pipe_resource            *gsvs_ring;
        struct pipe_resource            *tf_ring;
-       struct r600_resource            *border_color_table;
-       unsigned                        border_color_offset;
+       union pipe_color_union          *border_color_table; /* in CPU memory, any endian */
+       struct r600_resource            *border_color_buffer;
+       union pipe_color_union          *border_color_map; /* in VRAM (slow access), little endian */
+       unsigned                        border_color_count;
 
        /* Vertex and index buffers. */
        bool                            vertex_buffers_dirty;
index 52fa8fec033664ae2c14c8661431a9a34fa7ea5d..e31895d6933887f8c451be809b71eb0e53a0b56c 100644 (file)
@@ -2701,9 +2701,10 @@ static bool sampler_state_needs_border_color(const struct pipe_sampler_state *st
 static void *si_create_sampler_state(struct pipe_context *ctx,
                                     const struct pipe_sampler_state *state)
 {
+       struct si_context *sctx = (struct si_context *)ctx;
        struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
        unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0;
-       unsigned border_color_type;
+       unsigned border_color_type, border_color_index = 0;
 
        if (rstate == NULL) {
                return NULL;
@@ -2726,9 +2727,38 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
                 state->border_color.f[2] == 1 &&
                 state->border_color.f[3] == 1)
                border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
-       else
+       else {
+               int i;
+
                border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
 
+               /* Check if the border has been uploaded already. */
+               for (i = 0; i < sctx->border_color_count; i++)
+                       if (memcmp(&sctx->border_color_table[i], &state->border_color,
+                                  sizeof(state->border_color)) == 0)
+                               break;
+
+               if (i >= SI_MAX_BORDER_COLORS) {
+                       /* Getting 4096 unique border colors is very unlikely. */
+                       fprintf(stderr, "radeonsi: The border color table is full. "
+                               "Any new border colors will be just black. "
+                               "Please file a bug.\n");
+                       border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+               } else {
+                       if (i == sctx->border_color_count) {
+                               /* Upload a new border color. */
+                               memcpy(&sctx->border_color_table[i], &state->border_color,
+                                      sizeof(state->border_color));
+                               util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
+                                                       &state->border_color,
+                                                       sizeof(state->border_color));
+                               sctx->border_color_count++;
+                       }
+
+                       border_color_index = i;
+               }
+       }
+
        rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
                          S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
                          S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
@@ -2742,89 +2772,11 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
                          S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
                          S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
                          S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
-       rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
-
-       if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-               memcpy(rstate->border_color, state->border_color.ui,
-                      sizeof(rstate->border_color));
-       }
-
+       rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
+                        S_008F3C_BORDER_COLOR_TYPE(border_color_type);
        return rstate;
 }
 
-/* Upload border colors and update the pointers in resource descriptors.
- * There can only be 4096 border colors per context.
- *
- * XXX: This is broken if the buffer gets reallocated.
- */
-static void si_set_border_colors(struct si_context *sctx, unsigned count,
-                                void **states)
-{
-       struct si_sampler_state **rstates = (struct si_sampler_state **)states;
-       uint32_t *border_color_table = NULL;
-       int i, j;
-
-       for (i = 0; i < count; i++) {
-               if (rstates[i] &&
-                   G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) ==
-                   V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) {
-                       if (!sctx->border_color_table ||
-                           ((sctx->border_color_offset + count - i) &
-                            C_008F3C_BORDER_COLOR_PTR)) {
-                               r600_resource_reference(&sctx->border_color_table, NULL);
-                               sctx->border_color_offset = 0;
-
-                               sctx->border_color_table =
-                                       si_resource_create_custom(&sctx->screen->b.b,
-                                                                 PIPE_USAGE_DYNAMIC,
-                                                                 4096 * 4 * 4);
-                       }
-
-                       if (!border_color_table) {
-                               border_color_table =
-                                       sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf,
-                                                            sctx->b.rings.gfx.cs,
-                                                            PIPE_TRANSFER_WRITE |
-                                                            PIPE_TRANSFER_UNSYNCHRONIZED);
-                       }
-
-                       for (j = 0; j < 4; j++) {
-                               border_color_table[4 * sctx->border_color_offset + j] =
-                                       util_le32_to_cpu(rstates[i]->border_color[j]);
-                       }
-
-                       rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR;
-                       rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++);
-               }
-       }
-
-       if (border_color_table) {
-               struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-
-               uint64_t va_offset = sctx->border_color_table->gpu_address;
-
-               si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8);
-               if (sctx->b.chip_class >= CIK)
-                       si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40);
-               si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ,
-                             RADEON_PRIO_SHADER_DATA);
-               si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
-       }
-}
-
-static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
-                                   unsigned start, unsigned count,
-                                   void **states)
-{
-       struct si_context *sctx = (struct si_context *)ctx;
-
-       if (!count || shader >= SI_NUM_SHADERS)
-               return;
-
-       si_set_border_colors(sctx, count, states);
-       si_set_sampler_descriptors(sctx, shader, start, count, states);
-}
-
 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
        struct si_context *sctx = (struct si_context *)ctx;
@@ -3105,7 +3057,6 @@ void si_init_state_functions(struct si_context *sctx)
        sctx->b.b.get_sample_position = cayman_get_sample_position;
 
        sctx->b.b.create_sampler_state = si_create_sampler_state;
-       sctx->b.b.bind_sampler_states = si_bind_sampler_states;
        sctx->b.b.delete_sampler_state = si_delete_sampler_state;
 
        sctx->b.b.create_sampler_view = si_create_sampler_view;
@@ -3270,6 +3221,7 @@ static void si_init_config(struct si_context *sctx)
        unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
        unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
        unsigned raster_config, raster_config_1;
+       uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
        struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
        int i;
 
@@ -3434,5 +3386,11 @@ static void si_init_config(struct si_context *sctx)
                si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
        }
 
+       si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
+       if (sctx->b.chip_class >= CIK)
+               si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
+       si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
+                     RADEON_PRIO_SHADER_DATA);
+
        sctx->init_config = pm4;
 }
index 49f9f65bc14c597cfe3cf929817a7d2df55d3de8..f5726f0c5dedb59d8b07449473bed6038d14a73d 100644 (file)
@@ -91,7 +91,6 @@ union si_state {
                struct si_state_rasterizer      *rasterizer;
                struct si_state_dsa             *dsa;
                struct si_pm4_state             *poly_offset;
-               struct si_pm4_state             *ta_bordercolor_base;
                struct si_pm4_state             *ls;
                struct si_pm4_state             *hs;
                struct si_pm4_state             *es;
@@ -246,8 +245,6 @@ struct si_buffer_resources {
        } while(0)
 
 /* si_descriptors.c */
-void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
-                               unsigned start, unsigned count, void **states);
 void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
                        struct pipe_resource *buffer,
                        unsigned stride, unsigned num_records,