r600: increase number of UBOs to 15
authorRoland Scheidegger <sroland@vmware.com>
Wed, 3 Jan 2018 01:09:01 +0000 (02:09 +0100)
committerRoland Scheidegger <sroland@vmware.com>
Wed, 10 Jan 2018 03:59:00 +0000 (04:59 +0100)
With the exception of the default tess levels only ever accessed
by the default tcs shader, the LDS_INFO const buffer was only accessed by vtx
instructions, and not through kcache. No idea why really, but use this to our
advantage by not using a constant buffer slot for it. This just requires us to
throw the default tess levels into the "normal" driver const buffer instead.
Alternatively, could acesss those constants via vtx instructions too, but then
we couldn't use a ordinary ureg prog accessing them as constants and would have
to generate that directly when compiling the default tcs shader. (Another
alternative would be to put all lds info into the ordinary driver const
buffer, albeit we'd maybe need to increase the fixed size as it can't fit
alongside the ucp since vs needs access to the lds info too.)

Tested-by: Konstantin Kharlamov <hi-angel@yandex.ru>
Dave Airlie <airlied@redhat.com>

src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state_common.c

index f645791a2cbb286169ee842cc1f7e02b9fe15d7c..4cc48dfa11915cc15992f073631d2fb99ceb45e3 100644 (file)
@@ -2168,8 +2168,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 
                va = rbuffer->gpu_address + cb->buffer_offset;
 
-               if (!gs_ring_buffer) {
-                       assert(buffer_index < R600_MAX_HW_CONST_BUFFERS);
+               if (buffer_index < R600_MAX_HW_CONST_BUFFERS) {
                        radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
                                                    DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags);
                        radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
@@ -3880,7 +3879,7 @@ static void evergreen_set_tess_state(struct pipe_context *ctx,
 
        memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4);
        memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2);
-       rctx->tess_state_dirty = true;
+       rctx->driver_consts[PIPE_SHADER_TESS_CTRL].tcs_default_levels_dirty = true;
 }
 
 static void evergreen_setup_immed_buffer(struct r600_context *rctx,
@@ -4344,7 +4343,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
        unsigned input_vertex_size, output_vertex_size;
        unsigned input_patch_size, pervertex_output_patch_size, output_patch_size;
        unsigned output_patch0_offset, perpatch_output_offset, lds_size;
-       uint32_t values[16];
+       uint32_t values[8];
        unsigned num_waves;
        unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
        unsigned wave_divisor = (16 * num_pipes);
@@ -4364,7 +4363,6 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
 
        if (rctx->lds_alloc != 0 &&
            rctx->last_ls == ls &&
-           !rctx->tess_state_dirty &&
            rctx->last_num_tcs_input_cp == num_tcs_input_cp &&
            rctx->last_tcs == tcs)
                return;
@@ -4411,17 +4409,12 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
 
        rctx->lds_alloc = (lds_size | (num_waves << 14));
 
-       memcpy(&values[8], rctx->tess_state, 6 * sizeof(float));
-       values[14] = 0;
-       values[15] = 0;
-
-       rctx->tess_state_dirty = false;
        rctx->last_ls = ls;
        rctx->last_tcs = tcs;
        rctx->last_num_tcs_input_cp = num_tcs_input_cp;
 
        constbuf.user_buffer = values;
-       constbuf.buffer_size = 16 * 4;
+       constbuf.buffer_size = 8 * 4;
 
        rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
                                      R600_LDS_INFO_CONST_BUFFER, &constbuf);
index cb84bc1998a1adbab90e028d253b3867b186c6c3..112b5cbb83e33ffd05f83e904146cc216fd0dfbf 100644 (file)
@@ -71,7 +71,7 @@
 
 #define EG_MAX_ATOMIC_BUFFERS 8
 
-#define R600_MAX_USER_CONST_BUFFERS 14
+#define R600_MAX_USER_CONST_BUFFERS 15
 #define R600_MAX_DRIVER_CONST_BUFFERS 3
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 #define R600_MAX_HW_CONST_BUFFERS 16
 #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 #define R600_UCP_SIZE (4*4*8)
 #define R600_CS_BLOCK_GRID_SIZE (8 * 4)
+#define R600_TCS_DEFAULT_LEVELS_SIZE (6 * 4)
 #define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
 
+/*
+ * We only access this buffer through vtx clauses hence it's fine to exist
+ * at index beyond 15.
+ */
 #define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
 /*
  * Note GS doesn't use a constant buffer binding, just a resource index,
- * so it's fine to have it exist at index 16. I.e. it's not actually
+ * so it's fine to have it exist at index beyond 15. I.e. it's not actually
  * a const buffer, just a buffer resource.
  */
 #define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
@@ -396,10 +401,11 @@ struct r600_shader_driver_constants_info {
        /* currently 128 bytes for UCP/samplepos + sampler buffer constants */
        uint32_t                        *constants;
        uint32_t                        alloc_size;
-       bool                            vs_ucp_dirty;
        bool                            texture_const_dirty;
+       bool                            vs_ucp_dirty;
        bool                            ps_sample_pos_dirty;
        bool                            cs_block_grid_size_dirty;
+       bool                            tcs_default_levels_dirty;
 };
 
 struct r600_constbuf_state
@@ -580,7 +586,6 @@ struct r600_context {
        float sample_positions[4 * 16];
        float tess_state[8];
        uint32_t cs_block_grid_sizes[8]; /* 3 for grid + 1 pad, 3 for block  + 1 pad*/
-       bool tess_state_dirty;
        struct r600_pipe_shader_selector *last_ls;
        struct r600_pipe_shader_selector *last_tcs;
        unsigned last_num_tcs_input_cp;
index b3b2fd09ef8bff2de47ba001f7bfa8187f89ae49..7f4d9f3e3341fce54df6a898a5aabead2bc56b18 100644 (file)
@@ -1230,6 +1230,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
                if (!info->vs_ucp_dirty &&
                    !info->texture_const_dirty &&
                    !info->ps_sample_pos_dirty &&
+                   !info->tcs_default_levels_dirty &&
                    !info->cs_block_grid_size_dirty)
                        continue;
 
@@ -1246,7 +1247,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
                        info->vs_ucp_dirty = false;
                }
 
-               if (info->ps_sample_pos_dirty) {
+               else if (info->ps_sample_pos_dirty) {
                        assert(sh == PIPE_SHADER_FRAGMENT);
                        if (!size) {
                                ptr = rctx->sample_positions;
@@ -1257,7 +1258,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
                        info->ps_sample_pos_dirty = false;
                }
 
-               if (info->cs_block_grid_size_dirty) {
+               else if (info->cs_block_grid_size_dirty) {
                        assert(sh == PIPE_SHADER_COMPUTE);
                        if (!size) {
                                ptr = rctx->cs_block_grid_sizes;
@@ -1268,6 +1269,20 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
                        info->cs_block_grid_size_dirty = false;
                }
 
+               else if (info->tcs_default_levels_dirty) {
+                       /*
+                        * We'd only really need this for default tcs shader.
+                        */
+                       assert(sh == PIPE_SHADER_TESS_CTRL);
+                       if (!size) {
+                               ptr = rctx->tess_state;
+                               size = R600_TCS_DEFAULT_LEVELS_SIZE;
+                       } else {
+                               memcpy(ptr, rctx->tess_state, R600_TCS_DEFAULT_LEVELS_SIZE);
+                       }
+                       info->tcs_default_levels_dirty = false;
+               }
+
                if (info->texture_const_dirty) {
                        assert (ptr);
                        assert (size);
@@ -1277,6 +1292,8 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
                                memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
                        if (sh == PIPE_SHADER_COMPUTE)
                                memcpy(ptr, rctx->cs_block_grid_sizes, R600_CS_BLOCK_GRID_SIZE);
+                       if (sh == PIPE_SHADER_TESS_CTRL)
+                               memcpy(ptr, rctx->tess_state, R600_TCS_DEFAULT_LEVELS_SIZE);
                }
                info->texture_const_dirty = false;
 
@@ -1521,11 +1538,11 @@ static void r600_generate_fixed_func_tcs(struct r600_context *rctx)
 
        assert(!rctx->fixed_func_tcs_shader);
 
-       ureg_DECL_constant2D(ureg, 0, 3, R600_LDS_INFO_CONST_BUFFER);
-       const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 2),
-                                   R600_LDS_INFO_CONST_BUFFER);
-       const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 3),
-                                   R600_LDS_INFO_CONST_BUFFER);
+       ureg_DECL_constant2D(ureg, 0, 1, R600_BUFFER_INFO_CONST_BUFFER);
+       const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
+                                   R600_BUFFER_INFO_CONST_BUFFER);
+       const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1),
+                                   R600_BUFFER_INFO_CONST_BUFFER);
 
        tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
        tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);