radeonsi: Add buffer for offchip storage between TCS and TES.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 2 May 2016 07:54:11 +0000 (09:54 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Thu, 26 May 2016 20:07:04 +0000 (22:07 +0200)
The buffer is quite large, but should only be allocated if the
application uses tessellation. Most non-games don't.

v2: - Use the correct register for SI.
    - Add define for block size.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 6700590dfacf903f9ed3910838033315e152e8de..eefc68a7f81c8bcf050060e91d6eb9e48a6e2174 100644 (file)
@@ -48,6 +48,7 @@ static void si_destroy_context(struct pipe_context *context)
        pipe_resource_reference(&sctx->esgs_ring, NULL);
        pipe_resource_reference(&sctx->gsvs_ring, NULL);
        pipe_resource_reference(&sctx->tf_ring, NULL);
+       pipe_resource_reference(&sctx->tess_offchip_ring, NULL);
        pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
        r600_resource_reference(&sctx->border_color_buffer, NULL);
        free(sctx->border_color_table);
index 33d3d259e2ac9f4b774992f68151fe037675316b..e5b88c71dfdd4be940ce3290a7de743fcaefa281 100644 (file)
@@ -256,6 +256,7 @@ struct si_context {
        struct pipe_resource            *esgs_ring;
        struct pipe_resource            *gsvs_ring;
        struct pipe_resource            *tf_ring;
+       struct pipe_resource            *tess_offchip_ring;
        union pipe_color_union          *border_color_table; /* in CPU memory, any endian */
        struct r600_resource            *border_color_buffer;
        union pipe_color_union          *border_color_map; /* in VRAM (slow access), little endian */
index f2a3b037a2ceb4589bc7e639473ca5e3b4cd6272..a3589d4611d0b35ca39f6583159c104f0d2282d8 100644 (file)
@@ -40,6 +40,8 @@
 #define SI_NUM_IMAGES                  16
 #define SI_NUM_SHADER_BUFFERS          16
 
+#define SI_TESS_OFFCHIP_BLOCK_SIZE     (8192 * 4)
+
 struct si_screen;
 struct si_shader;
 
@@ -155,6 +157,7 @@ struct si_shader_data {
 /* Private read-write buffer slots. */
 enum {
        SI_HS_RING_TESS_FACTOR,
+       SI_HS_RING_TESS_OFFCHIP,
 
        SI_ES_RING_ESGS,
        SI_GS_RING_ESGS,
index 13066ff3c7dffdf583291409da6294419ce1a2cc..d8ae2b232e9e21804bafabe45122ab6f3309f146 100644 (file)
@@ -1770,6 +1770,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
 
 static void si_init_tess_factor_ring(struct si_context *sctx)
 {
+       unsigned offchip_blocks = sctx->b.chip_class >= CIK ? 256 : 64;
        assert(!sctx->tf_ring);
 
        sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
@@ -1780,6 +1781,14 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 
        assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
 
+       sctx->tess_offchip_ring = pipe_buffer_create(sctx->b.b.screen,
+                                                    PIPE_BIND_CUSTOM,
+                                                    PIPE_USAGE_DEFAULT,
+                                                    offchip_blocks *
+                                                    SI_TESS_OFFCHIP_BLOCK_SIZE);
+       if (!sctx->tess_offchip_ring)
+               return;
+
        si_init_config_add_vgt_flush(sctx);
 
        /* Append these registers to the init config state. */
@@ -1788,11 +1797,16 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
                               S_030938_SIZE(sctx->tf_ring->width0 / 4));
                si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
                               r600_resource(sctx->tf_ring)->gpu_address >> 8);
+               si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM,
+                            S_03093C_OFFCHIP_BUFFERING(offchip_blocks - 1) |
+                            S_03093C_OFFCHIP_GRANULARITY(V_03093C_X_8K_DWORDS));
        } else {
                si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
                               S_008988_SIZE(sctx->tf_ring->width0 / 4));
                si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
                               r600_resource(sctx->tf_ring)->gpu_address >> 8);
+               si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM,
+                              S_0089B0_OFFCHIP_BUFFERING(offchip_blocks - 1));
        }
 
        /* Flush the context to re-emit the init_config state.
@@ -1804,6 +1818,10 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 
        si_set_ring_buffer(&sctx->b.b, SI_HS_RING_TESS_FACTOR, sctx->tf_ring,
                           0, sctx->tf_ring->width0, false, false, 0, 0, 0);
+
+       si_set_ring_buffer(&sctx->b.b, SI_HS_RING_TESS_OFFCHIP,
+                          sctx->tess_offchip_ring, 0,
+                          sctx->tess_offchip_ring->width0, false, false, 0, 0, 0);
 }
 
 /**