gallium/swr: Enable GL_ARB_gpu_shader5: multiple streams
authorKrzysztof Raszkowski <krzysztof.raszkowski@intel.com>
Tue, 29 Oct 2019 14:50:02 +0000 (14:50 +0000)
committerJan Zielinski <jan.zielinski@intel.com>
Tue, 29 Oct 2019 14:50:02 +0000 (14:50 +0000)
Added support for geometry shader multiple streams (part of
GL_ARB_gpu_shader5 extension).

Reviewed-by: Jan Zielinski <jan.zielinski@intel.com>
src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/swr_screen.cpp
src/gallium/drivers/swr/swr_shader.cpp

index 25b0f0f07b5759511fe22bc1bd0970111a651b2b..c15722cc3e73935aadb721598aae44a6e0ffae02 100644 (file)
@@ -1521,7 +1521,8 @@ static void
 draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
                          struct lp_build_context * bld,
                          LLVMValueRef (*outputs)[4],
-                         LLVMValueRef emitted_vertices_vec)
+                         LLVMValueRef emitted_vertices_vec,
+                         LLVMValueRef stream_id)
 {
    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
    struct draw_gs_llvm_variant *variant = gs_iface->variant;
index 940a4c49a5009f43fcd35b189b3254a06eff8d8c..4bd0c0cf2af1ebe34e08ca6c3a86b2e0670baa15 100644 (file)
@@ -446,7 +446,8 @@ struct lp_build_gs_iface
    void (*emit_vertex)(const struct lp_build_gs_iface *gs_iface,
                        struct lp_build_context * bld,
                        LLVMValueRef (*outputs)[4],
-                       LLVMValueRef emitted_vertices_vec);
+                       LLVMValueRef emitted_vertices_vec,
+                       LLVMValueRef stream_id);
    void (*end_primitive)(const struct lp_build_gs_iface *gs_iface,
                          struct lp_build_context * bld,
                          LLVMValueRef total_emitted_vertices_vec,
index 8066d59ee67dbd7e6c1a9d2f6979f7562ace915b..5a67f834c90a7742c3640311cb770397c6fcb833 100644 (file)
@@ -3978,6 +3978,8 @@ emit_vertex(
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
 
    if (bld->gs_iface->emit_vertex) {
+      uint32_t imms_idx = emit_data->inst->Src[0].Register.SwizzleX;
+      LLVMValueRef stream_id = bld->immediates[0][imms_idx];
       LLVMValueRef mask = mask_vec(bld_base);
       LLVMValueRef total_emitted_vertices_vec =
          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
@@ -3986,7 +3988,8 @@ emit_vertex(
       gather_outputs(bld);
       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
                                  bld->outputs,
-                                 total_emitted_vertices_vec);
+                                 total_emitted_vertices_vec,
+                                 stream_id);
       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
                                 mask);
       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
index 13e92e8640a4b2b8ef571a3549f5d3d339348790..ab079ab4aa02751831d9bd1ff154c9305ababa1c 100644 (file)
@@ -702,8 +702,8 @@ void ProcessStreamIdBuffer(uint32_t stream,
 {
     SWR_ASSERT(stream < MAX_SO_STREAMS);
 
-    uint32_t numInputBytes  = (numEmittedVerts * 2 + 7) / 8;
-    uint32_t numOutputBytes = std::max(numInputBytes / 2, 1U);
+    uint32_t numInputBytes  = AlignUp(numEmittedVerts * 2, 8) / 8;
+    uint32_t numOutputBytes = AlignUp(numEmittedVerts, 8) / 8;
 
     for (uint32_t b = 0; b < numOutputBytes; ++b)
     {
index 030b62a15ae8c88c8d0968336f683e0e150fbe40..6c596a463b4e2c15d271ab4777e37961f9db77b3 100644 (file)
@@ -191,7 +191,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
       return 1024;
    case PIPE_CAP_MAX_VERTEX_STREAMS:
-      return 1;
+      return 4;
    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
       return 2048;
    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
index c8e34b8adb8a8b54334069fbc2a19ced56742dea..e5e5411fb10f043c368e26ac452c8c838d67549a 100644 (file)
@@ -251,7 +251,8 @@ struct BuilderSWR : public Builder {
    swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
                            struct lp_build_context * bld,
                            LLVMValueRef (*outputs)[4],
-                           LLVMValueRef emitted_vertices_vec);
+                           LLVMValueRef emitted_vertices_vec,
+                           LLVMValueRef stream_id);
 
    void
    swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
@@ -306,13 +307,15 @@ static void
 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
                            struct lp_build_context * bld,
                            LLVMValueRef (*outputs)[4],
-                           LLVMValueRef emitted_vertices_vec)
+                           LLVMValueRef emitted_vertices_vec,
+                           LLVMValueRef stream_id)
 {
     swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
 
     iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld,
                                             outputs,
-                                            emitted_vertices_vec);
+                                            emitted_vertices_vec,
+                                            stream_id);
 }
 
 static void
@@ -411,12 +414,12 @@ void
 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
                            struct lp_build_context * bld,
                            LLVMValueRef (*outputs)[4],
-                           LLVMValueRef emitted_vertices_vec)
+                           LLVMValueRef emitted_vertices_vec,
+                           LLVMValueRef stream_id)
 {
     swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
 
     IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
     const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE;
     const uint32_t attribSize = 4 * sizeof(float);
     const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS;
@@ -478,6 +481,49 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
        }
     }
 
+    /* When the output type is not points, the geometry shader may not
+     * output data to multiple streams. So early exit here.
+     */
+    if(iface->pGsState->outputTopology != TOP_POINT_LIST) {
+        STACKRESTORE(pStack);
+        return;
+    }
+
+    // Info about stream id for each vertex
+    // is coded in 2 bits (4 vert per byte "box"):
+    // ----------------- ----------------- ----
+    // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
+    // ----------------- ----------------- ----
+
+    // Calculate where need to put stream id for current vert
+    // in 1 byte "box".
+    Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2));
+
+    // Calculate in which box put stream id for current vert.
+    Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2));
+
+    // Skip count header
+    Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE));
+
+    for (uint32_t lane = 0; lane < mVWidth; ++lane) {
+       Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty);
+       Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
+
+       Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane)));
+
+       // Just make sure that not overflow max - stream id = (0,1,2,3)
+       Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty);
+
+       // Shift it to correct position in byte "box"
+       vVal = SHL(vVal, pShift);
+
+       // Info about other vertices can be already stored
+       // so we need to read and add bits from current vert info.
+       Value *storedValue = LOAD(pStreamOffset);
+       vVal = OR(storedValue, vVal);
+       STORE(vVal, pStreamOffset);
+    }
+
     STACKRESTORE(pStack);
 }
 
@@ -491,6 +537,15 @@ BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
 {
     swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
 
+    /* When the output type is points, the geometry shader may output data
+     * to multiple streams, and end_primitive has no effect. Info about
+     * stream id for vertices is stored into the same place in memory where
+     * end primitive info is stored so early exit in this case.
+     */
+    if (iface->pGsState->outputTopology == TOP_POINT_LIST) {
+        return;
+    }
+
     IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
     Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
@@ -569,9 +624,13 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
    pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
    pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
 
-   // XXX: single stream for now...
-   pGS->isSingleStream = true;
-   pGS->singleStreamID = 0;
+   // If point primitive then assume to use multiple streams
+   if(pGS->outputTopology == TOP_POINT_LIST) {
+      pGS->isSingleStream = false;
+   } else {
+      pGS->isSingleStream = true;
+      pGS->singleStreamID = 0;
+   }
 
    pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
    pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;