radeonsi: use a single descriptor for the GSVS ring
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 29 Nov 2016 16:41:59 +0000 (17:41 +0100)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 12 Dec 2016 08:05:05 +0000 (09:05 +0100)
We can hardcode all of the fields for swizzling in the geometry shader.

The advantage is that we use fewer descriptor slots and we no longer have to
update any of the (ring) descriptors when the geometry shader changes.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 31b7985fef9c296933dfb69bfdd2dc5a91aa45f3..9f79c2aa8046e2870c65ce34ce64394a38b962d7 100644 (file)
@@ -327,7 +327,6 @@ struct si_context {
        int                     last_vtx_reuse_depth;
        int                     current_rast_prim; /* primitive type after TES, GS */
        bool                    gs_tri_strip_adj_fix;
-       unsigned                last_gsvs_itemsize;
 
        /* Scratch buffer */
        struct r600_resource    *scratch_buffer;
index 48ccd83b396a2d15fa1da537cfa951817170d89f..9b495925a6e6616d9eb31d822790296810672cb7 100644 (file)
@@ -5817,6 +5817,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
 {
        struct gallivm_state *gallivm =
                ctx->soa.bld_base.base.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
 
        LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
                                            SI_PARAM_RW_BUFFERS);
@@ -5836,18 +5837,74 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
        }
 
        if (ctx->shader->is_gs_copy_shader) {
-               LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS);
+               LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
 
                ctx->gsvs_ring[0] =
                        build_indexed_load_const(ctx, buf_ptr, offset);
-       }
-       if (ctx->type == PIPE_SHADER_GEOMETRY) {
-               int i;
-               for (i = 0; i < 4; i++) {
-                       LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i);
+       } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
+               struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
+               LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
+               LLVMValueRef base_ring;
+
+               base_ring = build_indexed_load_const(ctx, buf_ptr, offset);
+
+               /* The conceptual layout of the GSVS ring is
+                *   v0c0 .. vLv0 v0c1 .. vLc1 ..
+                * but the real memory layout is swizzled across
+                * threads:
+                *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+                *   t16v0c0 ..
+                * Override the buffer descriptor accordingly.
+                */
+               LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
+               unsigned max_gsvs_emit_size = ctx->shader->selector->max_gsvs_emit_size;
+               unsigned num_records;
+
+               num_records = 64;
+               if (ctx->screen->b.chip_class >= VI)
+                       num_records *= max_gsvs_emit_size;
+
+               for (unsigned stream = 0; stream < 4; ++stream) {
+                       LLVMValueRef ring, tmp;
+
+                       if (!ctx->shader->selector->info.num_stream_output_components[stream])
+                               continue;
 
-                       ctx->gsvs_ring[i] =
-                               build_indexed_load_const(ctx, buf_ptr, offset);
+                       /* Limit on the stride field for <= CIK. */
+                       assert(max_gsvs_emit_size < (1 << 14));
+
+                       ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
+                       tmp = LLVMBuildExtractElement(builder, ring, uint->zero, "");
+                       tmp = LLVMBuildAdd(builder, tmp,
+                                          LLVMConstInt(ctx->i64,
+                                                       max_gsvs_emit_size * 64 * stream, 0), "");
+                       ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, "");
+                       ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
+                       tmp = LLVMBuildExtractElement(builder, ring, uint->one, "");
+                       tmp = LLVMBuildOr(builder, tmp,
+                               LLVMConstInt(ctx->i32,
+                                            S_008F04_STRIDE(max_gsvs_emit_size) |
+                                            S_008F04_SWIZZLE_ENABLE(1), 0), "");
+                       ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, "");
+                       ring = LLVMBuildInsertElement(builder, ring,
+                                       LLVMConstInt(ctx->i32, num_records, 0),
+                                       LLVMConstInt(ctx->i32, 2, 0), "");
+                       ring = LLVMBuildInsertElement(builder, ring,
+                               LLVMConstInt(ctx->i32,
+                                            S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                                            S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                                            S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                                            S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                                            S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                            S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                                            S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */
+                                            S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
+                                            S_008F0C_ADD_TID_ENABLE(1),
+                                            0),
+                               LLVMConstInt(ctx->i32, 3, 0), "");
+                       ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, "");
+
+                       ctx->gsvs_ring[stream] = ring;
                }
        }
 }
index d8e60249db2d20dadbe25b23bad65e4567ed414e..a17dbc73102ab8d005c81be21342e5b32cb770e8 100644 (file)
@@ -167,11 +167,7 @@ enum {
        SI_ES_RING_ESGS,
        SI_GS_RING_ESGS,
 
-       SI_GS_RING_GSVS0,
-       SI_GS_RING_GSVS1,
-       SI_GS_RING_GSVS2,
-       SI_GS_RING_GSVS3,
-       SI_VS_RING_GSVS,
+       SI_RING_GSVS,
 
        SI_VS_STREAMOUT_BUF0,
        SI_VS_STREAMOUT_BUF1,
index ea715695b97fe2662eee9b4d9c1548ec5a84d8b7..1e9f5f0a217b3d04261fa3ef88e949942104d3e5 100644 (file)
@@ -2039,47 +2039,14 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
                                   false, false, 0, 0, 0);
        }
        if (sctx->gsvs_ring) {
-               si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS,
+               si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS,
                                   sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
                                   false, false, 0, 0, 0);
-
-               /* Also update SI_GS_RING_GSVSi descriptors. */
-               sctx->last_gsvs_itemsize = 0;
        }
 
        return true;
 }
 
-static void si_update_gsvs_ring_bindings(struct si_context *sctx)
-{
-       unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
-       uint64_t offset;
-
-       if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize)
-               return;
-
-       sctx->last_gsvs_itemsize = gsvs_itemsize;
-
-       si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS0,
-                          sctx->gsvs_ring, gsvs_itemsize,
-                          64, true, true, 4, 16, 0);
-
-       offset = gsvs_itemsize * 64;
-       si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS1,
-                          sctx->gsvs_ring, gsvs_itemsize,
-                          64, true, true, 4, 16, offset);
-
-       offset = (gsvs_itemsize * 2) * 64;
-       si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS2,
-                          sctx->gsvs_ring, gsvs_itemsize,
-                          64, true, true, 4, 16, offset);
-
-       offset = (gsvs_itemsize * 3) * 64;
-       si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS3,
-                          sctx->gsvs_ring, gsvs_itemsize,
-                          64, true, true, 4, 16, offset);
-}
-
 /**
  * @returns 1 if \p sel has been updated to use a new scratch buffer
  *          0 if not
@@ -2469,8 +2436,6 @@ bool si_update_shaders(struct si_context *sctx)
 
                if (!si_update_gs_ring_buffers(sctx))
                        return false;
-
-               si_update_gsvs_ring_bindings(sctx);
        } else {
                si_pm4_bind_state(sctx, gs, NULL);
                si_pm4_bind_state(sctx, es, NULL);