r600: create LDS info constants buffer and write LDS registers. (v2)
authorDave Airlie <airlied@redhat.com>
Mon, 30 Nov 2015 04:56:10 +0000 (14:56 +1000)
committerDave Airlie <airlied@redhat.com>
Sun, 6 Dec 2015 23:59:00 +0000 (09:59 +1000)
This creates a constant buffer with the information about
the layout of the LDS memory that is given to the vertex, tess
control and tess evaluation shaders.

This also programs the LDS size and the LS_HS_CONFIG registers,
on evergreen only.

v2: calculate lds hs num waves properly (Marek)
Emit the state only when something has changed (airlied).

Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state_common.c

index bd68503f9c654e8aee8bc120811f561cced79def..79cdd7c2cdafee605d2c3ca370af85c15617e760 100644 (file)
@@ -3677,6 +3677,7 @@ static void evergreen_set_tess_state(struct pipe_context *ctx,
 
        memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4);
        memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2);
+       rctx->tess_state_dirty = true;
 }
 
 void evergreen_init_state_functions(struct r600_context *rctx)
@@ -3770,3 +3771,153 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 
        evergreen_init_compute_state_functions(rctx);
 }
+
+/**
+ * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
+ *
+ * The information about LDS and other non-compile-time parameters is then
+ * written to the const buffer.
+
+ * const buffer contains -
+ * uint32_t input_patch_size
+ * uint32_t input_vertex_size
+ * uint32_t num_tcs_input_cp
+ * uint32_t num_tcs_output_cp;
+ * uint32_t output_patch_size
+ * uint32_t output_vertex_size
+ * uint32_t output_patch0_offset
+ * uint32_t perpatch_output_offset
+ * and the same constbuf is bound to LS/HS/VS(ES).
+ */
+void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches)
+{
+       struct pipe_constant_buffer constbuf = {0};
+       struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader;
+       struct r600_pipe_shader_selector *ls = rctx->vs_shader;
+       unsigned num_tcs_input_cp = info->vertices_per_patch;
+       unsigned num_tcs_outputs;
+       unsigned num_tcs_output_cp;
+       unsigned num_tcs_patch_outputs;
+       unsigned num_tcs_inputs;
+       unsigned input_vertex_size, output_vertex_size;
+       unsigned input_patch_size, pervertex_output_patch_size, output_patch_size;
+       unsigned output_patch0_offset, perpatch_output_offset, lds_size;
+       uint32_t values[16];
+       unsigned num_waves;
+       unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
+       unsigned wave_divisor = (16 * num_pipes);
+
+       *num_patches = 1;
+
+       if (!rctx->tes_shader) {
+               rctx->lds_alloc = 0;
+               rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+                                             R600_LDS_INFO_CONST_BUFFER, NULL);
+               rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
+                                             R600_LDS_INFO_CONST_BUFFER, NULL);
+               rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
+                                             R600_LDS_INFO_CONST_BUFFER, NULL);
+               return;
+       }
+
+       if (rctx->lds_alloc != 0 &&
+           rctx->last_ls == ls &&
+           !rctx->tess_state_dirty &&
+           rctx->last_num_tcs_input_cp == num_tcs_input_cp &&
+           rctx->last_tcs == tcs)
+               return;
+
+       num_tcs_inputs = util_last_bit64(ls->lds_outputs_written_mask);
+
+       if (rctx->tcs_shader) {
+               num_tcs_outputs = util_last_bit64(tcs->lds_outputs_written_mask);
+               num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+               num_tcs_patch_outputs = util_last_bit64(tcs->lds_patch_outputs_written_mask);
+       } else {
+               num_tcs_outputs = num_tcs_inputs;
+               num_tcs_output_cp = num_tcs_input_cp;
+               num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
+       }
+
+       /* size in bytes */
+       input_vertex_size = num_tcs_inputs * 16;
+       output_vertex_size = num_tcs_outputs * 16;
+
+       input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+       pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+       output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+       output_patch0_offset = rctx->tcs_shader ? input_patch_size * *num_patches : 0;
+       perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
+
+       lds_size = output_patch0_offset + output_patch_size * *num_patches;
+
+       values[0] = input_patch_size;
+       values[1] = input_vertex_size;
+       values[2] = num_tcs_input_cp;
+       values[3] = num_tcs_output_cp;
+
+       values[4] = output_patch_size;
+       values[5] = output_vertex_size;
+       values[6] = output_patch0_offset;
+       values[7] = perpatch_output_offset;
+
+       /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES *
+          LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */
+       num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor);
+
+       rctx->lds_alloc = (lds_size | (num_waves << 14));
+
+       memcpy(&values[8], rctx->tess_state, 6 * sizeof(float));
+       values[14] = 0;
+       values[15] = 0;
+
+       rctx->tess_state_dirty = false;
+       rctx->last_ls = ls;
+       rctx->last_tcs = tcs;
+       rctx->last_num_tcs_input_cp = num_tcs_input_cp;
+
+       constbuf.user_buffer = values;
+       constbuf.buffer_size = 16 * 4;
+
+       rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+                                     R600_LDS_INFO_CONST_BUFFER, &constbuf);
+       rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
+                                     R600_LDS_INFO_CONST_BUFFER, &constbuf);
+       rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
+                                     R600_LDS_INFO_CONST_BUFFER, &constbuf);
+       pipe_resource_reference(&constbuf.buffer, NULL);
+}
+
+uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
+                                   const struct pipe_draw_info *info,
+                                   unsigned num_patches)
+{
+       unsigned num_output_cp;
+
+       if (!rctx->tes_shader)
+               return 0;
+
+       num_output_cp = rctx->tcs_shader ?
+               rctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+               info->vertices_per_patch;
+
+       return S_028B58_NUM_PATCHES(num_patches) |
+               S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) |
+               S_028B58_HS_NUM_OUTPUT_CP(num_output_cp);
+}
+
+void evergreen_set_ls_hs_config(struct r600_context *rctx,
+                               struct radeon_winsys_cs *cs,
+                               uint32_t ls_hs_config)
+{
+       radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+}
+
+void evergreen_set_lds_alloc(struct r600_context *rctx,
+                            struct radeon_winsys_cs *cs,
+                            uint32_t lds_alloc)
+{
+       radeon_set_context_reg(cs, R_0288E8_SQ_LDS_ALLOC, lds_alloc);
+}
index ac06d1f51b6a39610442e8b896a6b432e420321a..98dc6fc3d01dab68eeb535f62fb08e3185564b72 100644 (file)
 
 /* the number of CS dwords for flushing and drawing */
 #define R600_MAX_FLUSH_CS_DWORDS       16
-#define R600_MAX_DRAW_CS_DWORDS                52
+#define R600_MAX_DRAW_CS_DWORDS                58
 #define R600_TRACE_CS_DWORDS           7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 2
+#define R600_MAX_DRIVER_CONST_BUFFERS 3
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
 #define R600_UCP_SIZE (4*4*8)
 #define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
 
-#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+/*
+ * Note GS doesn't use a constant buffer binding, just a resource index,
+ * so it's fine to have it exist at index 16.
+ */
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
 /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
  * of 16 const buffers.
  * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
@@ -525,6 +530,11 @@ struct r600_context {
        struct r600_isa         *isa;
        float sample_positions[4 * 16];
        float tess_state[8];
+       bool tess_state_dirty;
+       struct r600_pipe_shader_selector *last_ls;
+       struct r600_pipe_shader_selector *last_tcs;
+       unsigned last_num_tcs_input_cp;
+       unsigned lds_alloc;
 };
 
 static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
@@ -702,6 +712,18 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
                               uint64_t dst_offset,
                               uint64_t src_offset,
                               uint64_t size);
+void evergreen_setup_tess_constants(struct r600_context *rctx,
+                                   const struct pipe_draw_info *info,
+                                   unsigned *num_patches);
+uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
+                                   const struct pipe_draw_info *info,
+                                   unsigned num_patches);
+void evergreen_set_ls_hs_config(struct r600_context *rctx,
+                               struct radeon_winsys_cs *cs,
+                               uint32_t ls_hs_config);
+void evergreen_set_lds_alloc(struct r600_context *rctx,
+                            struct radeon_winsys_cs *cs,
+                            uint32_t lds_alloc);
 
 /* r600_state_common.c */
 void r600_init_common_state_functions(struct r600_context *rctx);
index de90a99ac1a6e08c9a675edd637b7d8d7fecba1f..7cc5adcb2b8d16c426b2153ea2611415faee95f8 100644 (file)
@@ -1612,6 +1612,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
        struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
        bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
        uint64_t mask;
+       unsigned num_patches;
 
        if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
                return;
@@ -1717,6 +1718,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
        }
 
+       if (rctx->b.chip_class >= EVERGREEN)
+               evergreen_setup_tess_constants(rctx, &info, &num_patches);
+
        /* Emit states. */
        r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
        r600_flush_emit(rctx);
@@ -1750,6 +1754,14 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                                       S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1));
        }
 
+       if (rctx->b.chip_class >= EVERGREEN) {
+               uint32_t ls_hs_config = evergreen_get_ls_hs_config(rctx, &info,
+                                                                  num_patches);
+
+               evergreen_set_ls_hs_config(rctx, cs, ls_hs_config);
+               evergreen_set_lds_alloc(rctx, cs, rctx->lds_alloc);
+       }
+
        /* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles,
         * even though it should have no effect on those. */
        if (rctx->b.chip_class == R600 && rctx->rasterizer) {