From 163d5fde06696fed2e69e000a7621087c1636749 Mon Sep 17 00:00:00 2001 From: Krzysztof Raszkowski Date: Tue, 29 Oct 2019 14:50:02 +0000 Subject: [PATCH] gallium/swr: Enable GL_ARB_gpu_shader5: multiple streams Added support for geometry shader multiple streams (part of GL_ARB_gpu_shader5 extension). Reviewed-by: Jan Zielinski --- src/gallium/auxiliary/draw/draw_llvm.c | 3 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 3 +- .../auxiliary/gallivm/lp_bld_tgsi_soa.c | 5 +- .../drivers/swr/rasterizer/core/frontend.cpp | 4 +- src/gallium/drivers/swr/swr_screen.cpp | 2 +- src/gallium/drivers/swr/swr_shader.cpp | 75 +++++++++++++++++-- 6 files changed, 78 insertions(+), 14 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 25b0f0f07b5..c15722cc3e7 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1521,7 +1521,8 @@ static void draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, struct lp_build_context * bld, LLVMValueRef (*outputs)[4], - LLVMValueRef emitted_vertices_vec) + LLVMValueRef emitted_vertices_vec, + LLVMValueRef stream_id) { const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base); struct draw_gs_llvm_variant *variant = gs_iface->variant; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 940a4c49a50..4bd0c0cf2af 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -446,7 +446,8 @@ struct lp_build_gs_iface void (*emit_vertex)(const struct lp_build_gs_iface *gs_iface, struct lp_build_context * bld, LLVMValueRef (*outputs)[4], - LLVMValueRef emitted_vertices_vec); + LLVMValueRef emitted_vertices_vec, + LLVMValueRef stream_id); void (*end_primitive)(const struct lp_build_gs_iface *gs_iface, struct lp_build_context * bld, LLVMValueRef total_emitted_vertices_vec, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 8066d59ee67..5a67f834c90 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -3978,6 +3978,8 @@ emit_vertex( LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; if (bld->gs_iface->emit_vertex) { + uint32_t imms_idx = emit_data->inst->Src[0].Register.SwizzleX; + LLVMValueRef stream_id = bld->immediates[0][imms_idx]; LLVMValueRef mask = mask_vec(bld_base); LLVMValueRef total_emitted_vertices_vec = LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); @@ -3986,7 +3988,8 @@ emit_vertex( gather_outputs(bld); bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base, bld->outputs, - total_emitted_vertices_vec); + total_emitted_vertices_vec, + stream_id); increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, mask); increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 13e92e8640a..ab079ab4aa0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -702,8 +702,8 @@ void ProcessStreamIdBuffer(uint32_t stream, { SWR_ASSERT(stream < MAX_SO_STREAMS); - uint32_t numInputBytes = (numEmittedVerts * 2 + 7) / 8; - uint32_t numOutputBytes = std::max(numInputBytes / 2, 1U); + uint32_t numInputBytes = AlignUp(numEmittedVerts * 2, 8) / 8; + uint32_t numOutputBytes = AlignUp(numEmittedVerts, 8) / 8; for (uint32_t b = 0; b < numOutputBytes; ++b) { diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 030b62a15ae..6c596a463b4 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -191,7 +191,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: return 1024; case PIPE_CAP_MAX_VERTEX_STREAMS: - return 1; + return 4; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index c8e34b8adb8..e5e5411fb10 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -251,7 +251,8 @@ struct BuilderSWR : public Builder { swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, struct lp_build_context * bld, LLVMValueRef (*outputs)[4], - LLVMValueRef emitted_vertices_vec); + LLVMValueRef emitted_vertices_vec, + LLVMValueRef stream_id); void swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, @@ -306,13 +307,15 @@ static void swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, struct lp_build_context * bld, LLVMValueRef (*outputs)[4], - LLVMValueRef emitted_vertices_vec) + LLVMValueRef emitted_vertices_vec, + LLVMValueRef stream_id) { swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld, outputs, - emitted_vertices_vec); + emitted_vertices_vec, + stream_id); } static void @@ -411,12 +414,12 @@ void BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, struct lp_build_context * bld, LLVMValueRef (*outputs)[4], - LLVMValueRef emitted_vertices_vec) + LLVMValueRef emitted_vertices_vec, + LLVMValueRef stream_id) { swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); - const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE; const uint32_t attribSize = 4 * sizeof(float); const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS; @@ -478,6 +481,49 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base, } } + /* When the output type is not points, the geometry shader may not + * output data to multiple streams. So early exit here. + */ + if(iface->pGsState->outputTopology != TOP_POINT_LIST) { + STACKRESTORE(pStack); + return; + } + + // Info about stream id for each vertex + // is coded in 2 bits (4 vert per byte "box"): + // ----------------- ----------------- ---- + // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |... + // ----------------- ----------------- ---- + + // Calculate where need to put stream id for current vert + // in 1 byte "box". + Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2)); + + // Calculate in which box put stream id for current vert. + Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2)); + + // Skip count header + Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE)); + + for (uint32_t lane = 0; lane < mVWidth; ++lane) { + Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty); + Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane}); + + Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane))); + + // Just make sure that not overflow max - stream id = (0,1,2,3) + Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty); + + // Shift it to correct position in byte "box" + vVal = SHL(vVal, pShift); + + // Info about other vertices can be already stored + // so we need to read and add bits from current vert info. + Value *storedValue = LOAD(pStreamOffset); + vVal = OR(storedValue, vVal); + STORE(vVal, pStreamOffset); + } + STACKRESTORE(pStack); } @@ -491,6 +537,15 @@ BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base, { swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; + /* When the output type is points, the geometry shader may output data + * to multiple streams, and end_primitive has no effect. Info about + * stream id for vertices is stored into the same place in memory where + * end primitive info is stored so early exit in this case. + */ + if (iface->pGsState->outputTopology == TOP_POINT_LIST) { + return; + } + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask }); @@ -569,9 +624,13 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS]; - // XXX: single stream for now... - pGS->isSingleStream = true; - pGS->singleStreamID = 0; + // If point primitive then assume to use multiple streams + if(pGS->outputTopology == TOP_POINT_LIST) { + pGS->isSingleStream = false; + } else { + pGS->isSingleStream = true; + pGS->singleStreamID = 0; + } pGS->vertexAttribOffset = VERTEX_POSITION_SLOT; pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset; -- 2.30.2