From e12db47a7ddcee6f26409b5b1dd722747560597d Mon Sep 17 00:00:00 2001 From: George Kyriazis Date: Fri, 2 Feb 2018 17:03:01 -0600 Subject: [PATCH] swr/rast: Use llvm intrinsic masked gather Use llvm intrinsic masked.gather instead of manual unroll for the cases where we have vector of pointers. Improves llvm IR debug experience by reducing a ton of IR to a single intrinsic call. Also seems to reduce overall stack use considerably. Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/jitter/builder_mem.cpp | 12 ++++++++++++ .../drivers/swr/rasterizer/jitter/builder_mem.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index 6e462d522f7..86fdfca392f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -346,6 +346,18 @@ namespace SwrJit return vGather; } + ////////////////////////////////////////////////////////////////////////// + /// @brief Alternative masked gather where source is a vector of pointers + /// @param pVecSrcPtr - SIMD wide vector of pointers + /// @param pVecMask - SIMD active lanes + /// @param pVecPassthru - SIMD wide vector of values to load when lane is inactive + Value* Builder::GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru) + { + Function* pMaskedGather = llvm::Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::masked_gather, { pVecPassthru->getType() }); + + return CALL(pMaskedGather, { pVecSrcPtr, C(0), pVecMask, pVecPassthru }); + } + void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets, Value* mask, Value* vGatherComponents[], bool bPackedOutput) { diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h index c2279a62d98..f31cb4abae0 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h @@ -58,6 +58,8 @@ virtual void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byte Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1); +Value *GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru); + void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask); void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput); -- 2.30.2