r600g: lower number of driver const buffers
authorDave Airlie <airlied@redhat.com>
Fri, 11 Sep 2015 03:43:53 +0000 (04:43 +0100)
committerDave Airlie <airlied@redhat.com>
Sat, 12 Sep 2015 05:56:58 +0000 (06:56 +0100)
I'm going to want a driver constant buffer for tess to coordinate
LDS storage, so before I go tackling that I decided to merge the
clip/samplepos and texture info buffers into one. So I can steal
the spare one.

This creates a single constant buffer between the two, with
clip/samplepos taking up a reserved 128 bytes at the start.

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_state_common.c

index 25df831339cef01f57f639f21d2f2b6f7630ebc3..d0774de857366c8509f55991424e1825f81c0aa5 100644 (file)
 #define R600_TRACE_CS_DWORDS           7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 3
+#define R600_MAX_DRIVER_CONST_BUFFERS 2
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
-#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
+#define R600_UCP_SIZE (4*4*8)
+#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
+
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
 /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
  * of 16 const buffers.
  * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -77,8 +79,6 @@
  * In order to support d3d 11 mandated minimum of 15 user const buffers
  * we'd have to squash all use cases into one driver buffer.
  */
-#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-
 #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
 
 #ifdef PIPE_ARCH_BIG_ENDIAN
@@ -356,11 +356,15 @@ struct r600_textures_info {
        struct r600_samplerview_state   views;
        struct r600_sampler_states      states;
        bool                            is_array_sampler[NUM_TEX_UNITS];
+};
 
-       /* cube array txq workaround */
-       uint32_t                        *txq_constants;
-       /* buffer related workarounds */
-       uint32_t                        *buffer_constants;
+struct r600_shader_driver_constants_info {
+       /* currently 128 bytes for UCP/samplepos + sampler buffer constants */
+       uint32_t                        *constants;
+       uint32_t                        alloc_size;
+       bool                            vs_ucp_dirty;
+       bool                            texture_const_dirty;
+       bool                            ps_sample_pos_dirty;
 };
 
 struct r600_constbuf_state
@@ -472,6 +476,9 @@ struct r600_context {
        struct r600_gs_rings_state      gs_rings;
        struct r600_constbuf_state      constbuf_state[PIPE_SHADER_TYPES];
        struct r600_textures_info       samplers[PIPE_SHADER_TYPES];
+
+       struct r600_shader_driver_constants_info driver_consts[PIPE_SHADER_TYPES];
+
        /** Vertex buffers for fetch shaders */
        struct r600_vertexbuf_state     vertex_buffer_state;
        /** Vertex buffers for compute shaders */
@@ -498,6 +505,7 @@ struct r600_context {
 
        void                            *sb_context;
        struct r600_isa         *isa;
+       float sample_positions[4 * 16];
 };
 
 static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
index f2c9e169f743a2f0a0f1544c9dd096668821037d..93b1bf7d5b447c1aa3f44597069dc5eb2d8b6d81 100644 (file)
@@ -60,6 +60,7 @@ issued in the w slot as well.
 The compiler must issue the source argument to slots z, y, and x
 */
 
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
 static int r600_shader_from_tgsi(struct r600_context *rctx,
                                 struct r600_pipe_shader *pipeshader,
                                 union r600_shader_key key);
@@ -947,7 +948,7 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
 
        memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
        vtx.op = FETCH_OP_VFETCH;
-       vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+       vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
        vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
        if (sample_id == NULL) {
                vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
@@ -2307,7 +2308,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                                alu.src[0].chan = j;
 
                                alu.src[1].sel = 512 + i;
-                               alu.src[1].kc_bank = R600_UCP_CONST_BUFFER;
+                               alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
                                alu.src[1].chan = j;
 
                                alu.dst.sel = clipdist_temp[oreg];
@@ -5499,7 +5500,8 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
                alu.src[0].sel = vtx.dst_gpr;
                alu.src[0].chan = i;
 
-               alu.src[1].sel = 512 + (id * 2);
+               alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL;
+               alu.src[1].sel += (id * 2);
                alu.src[1].chan = i % 4;
                alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 
@@ -5521,7 +5523,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
                alu.src[0].sel = vtx.dst_gpr;
                alu.src[0].chan = 3;
 
-               alu.src[1].sel = 512 + (id * 2) + 1;
+               alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1;
                alu.src[1].chan = 0;
                alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
 
@@ -5542,14 +5544,14 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx)
 
        memset(&alu, 0, sizeof(struct r600_bytecode_alu));
        alu.op = ALU_OP1_MOV;
-
+       alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
        if (ctx->bc->chip_class >= EVERGREEN) {
                /* channel 0 or 2 of each word */
-               alu.src[0].sel = 512 + (id / 2);
+               alu.src[0].sel += (id / 2);
                alu.src[0].chan = (id % 2) * 2;
        } else {
                /* r600 we have them at channel 2 of the second dword */
-               alu.src[0].sel = 512 + (id * 2) + 1;
+               alu.src[0].sel += (id * 2) + 1;
                alu.src[0].chan = 1;
        }
        alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
@@ -6207,13 +6209,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                memset(&alu, 0, sizeof(struct r600_bytecode_alu));
                alu.op = ALU_OP1_MOV;
 
+               alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
                if (ctx->bc->chip_class >= EVERGREEN) {
                        /* channel 1 or 3 of each word */
-                       alu.src[0].sel = 512 + (id / 2);
+                       alu.src[0].sel += (id / 2);
                        alu.src[0].chan = ((id % 2) * 2) + 1;
                } else {
                        /* r600 we have them at channel 2 of the second dword */
-                       alu.src[0].sel = 512 + (id * 2) + 1;
+                       alu.src[0].sel += (id * 2) + 1;
                        alu.src[0].chan = 2;
                }
                alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
index ae1341187cb9e237154c92c6258d596514fccd01..21c89dc0b6150ec9029191fe999c2add45571630 100644 (file)
@@ -240,17 +240,10 @@ static void r600_set_clip_state(struct pipe_context *ctx,
                                const struct pipe_clip_state *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct pipe_constant_buffer cb;
 
        rctx->clip_state.state = *state;
        r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
-
-       cb.buffer = NULL;
-       cb.user_buffer = state->ucp;
-       cb.buffer_offset = 0;
-       cb.buffer_size = 4*4*8;
-       ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, R600_UCP_CONST_BUFFER, &cb);
-       pipe_resource_reference(&cb.buffer, NULL);
+       rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true;
 }
 
 static void r600_set_stencil_ref(struct pipe_context *ctx,
@@ -1053,6 +1046,74 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
        r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
 }
 
+static void r600_update_driver_const_buffers(struct r600_context *rctx)
+{
+       int sh, size;;
+       void *ptr;
+       struct pipe_constant_buffer cb;
+       for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+               struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
+               if (!info->vs_ucp_dirty &&
+                   !info->texture_const_dirty &&
+                   !info->ps_sample_pos_dirty)
+                       continue;
+
+               ptr = info->constants;
+               size = info->alloc_size;
+               if (info->vs_ucp_dirty) {
+                       assert(sh == PIPE_SHADER_VERTEX);
+                       if (!size) {
+                               ptr = rctx->clip_state.state.ucp;
+                               size = R600_UCP_SIZE;
+                       } else {
+                               memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+                       }
+                       info->vs_ucp_dirty = false;
+               }
+
+               if (info->ps_sample_pos_dirty) {
+                       assert(sh == PIPE_SHADER_FRAGMENT);
+                       if (!size) {
+                               ptr = rctx->sample_positions;
+                               size = R600_UCP_SIZE;
+                       } else {
+                               memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+                       }
+                       info->ps_sample_pos_dirty = false;
+               }
+
+               if (info->texture_const_dirty) {
+                       assert (ptr);
+                       assert (size);
+                       if (sh == PIPE_SHADER_VERTEX)
+                               memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
+                       if (sh == PIPE_SHADER_FRAGMENT)
+                               memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
+               }
+               info->texture_const_dirty = false;
+
+               cb.buffer = NULL;
+               cb.user_buffer = ptr;
+               cb.buffer_offset = 0;
+               cb.buffer_size = size;
+               rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+               pipe_resource_reference(&cb.buffer, NULL);
+       }
+}
+
+static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type,
+                                  int array_size, uint32_t *base_offset)
+{
+       struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type];
+       if (array_size + R600_UCP_SIZE > info->alloc_size) {
+               info->constants = realloc(info->constants, array_size + R600_UCP_SIZE);
+               info->alloc_size = array_size + R600_UCP_SIZE;
+       }
+       memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size);
+       info->texture_const_dirty = true;
+       *base_offset = R600_UCP_SIZE;
+       return info->constants;
+}
 /*
  * On r600/700 hw we don't have vertex fetch swizzle, though TBO
  * doesn't require full swizzles it does need masking and setting alpha
@@ -1067,9 +1128,9 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
        struct r600_textures_info *samplers = &rctx->samplers[shader_type];
        int bits;
        uint32_t array_size;
-       struct pipe_constant_buffer cb;
        int i, j;
-
+       uint32_t *constants;
+       uint32_t base_offset;
        if (!samplers->views.dirty_buffer_constants)
                return;
 
@@ -1077,38 +1138,33 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
 
        bits = util_last_bit(samplers->views.enabled_mask);
        array_size = bits * 8 * sizeof(uint32_t) * 4;
-       samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
-       memset(samplers->buffer_constants, 0, array_size);
+
+       constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset);
+
        for (i = 0; i < bits; i++) {
                if (samplers->views.enabled_mask & (1 << i)) {
-                       int offset = i * 8;
+                       int offset = (base_offset / 4) + i * 8;
                        const struct util_format_description *desc;
                        desc = util_format_description(samplers->views.views[i]->base.format);
 
                        for (j = 0; j < 4; j++)
                                if (j < desc->nr_channels)
-                                       samplers->buffer_constants[offset+j] = 0xffffffff;
+                                       constants[offset+j] = 0xffffffff;
                                else
-                                       samplers->buffer_constants[offset+j] = 0x0;
+                                       constants[offset+j] = 0x0;
                        if (desc->nr_channels < 4) {
                                if (desc->channel[0].pure_integer)
-                                       samplers->buffer_constants[offset+4] = 1;
+                                       constants[offset+4] = 1;
                                else
-                                       samplers->buffer_constants[offset+4] = fui(1.0);
+                                       constants[offset+4] = fui(1.0);
                        } else
-                               samplers->buffer_constants[offset + 4] = 0;
+                               constants[offset + 4] = 0;
 
-                       samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
-                       samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
+                       constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+                       constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
                }
        }
 
-       cb.buffer = NULL;
-       cb.user_buffer = samplers->buffer_constants;
-       cb.buffer_offset = 0;
-       cb.buffer_size = array_size;
-       rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
-       pipe_resource_reference(&cb.buffer, NULL);
 }
 
 /* On evergreen we store two values
@@ -1120,9 +1176,9 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
        struct r600_textures_info *samplers = &rctx->samplers[shader_type];
        int bits;
        uint32_t array_size;
-       struct pipe_constant_buffer cb;
        int i;
-
+       uint32_t *constants;
+       uint32_t base_offset;
        if (!samplers->views.dirty_buffer_constants)
                return;
 
@@ -1130,45 +1186,37 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
 
        bits = util_last_bit(samplers->views.enabled_mask);
        array_size = bits * 2 * sizeof(uint32_t) * 4;
-       samplers->buffer_constants = realloc(samplers->buffer_constants, array_size);
-       memset(samplers->buffer_constants, 0, array_size);
+
+       constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
+                                         &base_offset);
+
        for (i = 0; i < bits; i++) {
                if (samplers->views.enabled_mask & (1 << i)) {
-                       uint32_t offset = i * 2;
-                       samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
-                       samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+                       uint32_t offset = (base_offset / 4) + i * 2;
+                       constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+                       constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
                }
        }
-
-       cb.buffer = NULL;
-       cb.user_buffer = samplers->buffer_constants;
-       cb.buffer_offset = 0;
-       cb.buffer_size = array_size;
-       rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb);
-       pipe_resource_reference(&cb.buffer, NULL);
 }
 
 /* set sample xy locations as array of fragment shader constants */
 void r600_set_sample_locations_constant_buffer(struct r600_context *rctx)
 {
-       struct pipe_constant_buffer constbuf = {0};
-       float values[4*16] = {0.0f};
        int i;
        struct pipe_context *ctx = &rctx->b.b;
 
-       assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
+       assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE);
+       assert(rctx->framebuffer.nr_samples <= Elements(rctx->sample_positions)/4);
+
+       memset(rctx->sample_positions, 0, 4 * 4 * 16);
        for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
-               ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]);
+               ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]);
                /* Also fill in center-zeroed positions used for interpolateAtSample */
-               values[4*i + 2] = values[4*i + 0] - 0.5f;
-               values[4*i + 3] = values[4*i + 1] - 0.5f;
+               rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f;
+               rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f;
        }
 
-       constbuf.user_buffer = values;
-       constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
-       ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT,
-               R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf);
-       pipe_resource_reference(&constbuf.buffer, NULL);
+       rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true;
 }
 
 static void update_shader_atom(struct pipe_context *ctx,
@@ -1387,6 +1435,8 @@ static bool r600_update_derived_state(struct r600_context *rctx)
                }
        }
 
+       r600_update_driver_const_buffers(rctx);
+
        if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
                if (!r600_adjust_gprs(rctx)) {
                        /* discard rendering */