From 1eb72673fc239b5d0a95e393b847fdf8c8de6962 Mon Sep 17 00:00:00 2001 From: George Kyriazis Date: Thu, 22 Mar 2018 15:25:36 -0500 Subject: [PATCH] swr/rast: Lower VGATHERPS and VGATHERPS_16 to x86. Some more work to do before we can support simultaneous 8-wide and 16-wide and remove the VGATHERPS_16 version. Reviewed-by: Bruce Cherniak --- .../swr/rasterizer/jitter/builder_mem.cpp | 69 +------------------ 1 file changed, 2 insertions(+), 67 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index d8ec885308a..adb929600be 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -153,79 +153,14 @@ namespace SwrJit { AssertMemoryUsageParams(pBase, usage); - Value *vGather; - Value *pBasePtr = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0)); - - // use avx2 gather instruction if available - if (JM()->mArch.AVX2()) - { - vGather = VGATHERPS(vSrc, pBasePtr, vIndices, vMask, C(scale)); - } - else - { - Value* pStack = STACKSAVE(); - - // store vSrc on the stack. this way we can select between a valid load address and the vSrc address - Value* vSrcPtr = ALLOCA(vSrc->getType()); - STORE(vSrc, vSrcPtr); - - vGather = VUNDEF_F(); - Value *vScaleVec = VIMMED1((uint32_t)scale); - Value *vOffsets = MUL(vIndices, vScaleVec); - for (uint32_t i = 0; i < mVWidth; ++i) - { - // single component byte index - Value *offset = VEXTRACT(vOffsets, C(i)); - // byte pointer to component - Value *loadAddress = GEP(pBasePtr, offset); - loadAddress = BITCAST(loadAddress, PointerType::get(mFP32Ty, 0)); - // pointer to the value to load if we're masking off a component - Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) }); - Value *selMask = VEXTRACT(vMask, C(i)); - // switch in a safe address to load if we're trying to access a vertex - Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress); - Value *val = LOAD(validAddress); - vGather = VINSERT(vGather, val, C(i)); - } - - STACKRESTORE(pStack); - } - - return vGather; + return VGATHERPS(vSrc, pBase, vIndices, vMask, C(scale)); } Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage) { AssertMemoryUsageParams(pBase, usage); - Value *vGather = VUNDEF_F_16(); - - // use AVX512F gather instruction if available - if (JM()->mArch.AVX512F()) - { - // force mask to , required by vgather2 - Value *mask = BITCAST(vMask, mInt16Ty); - - vGather = VGATHERPS_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale)); - } - else - { - Value *src0 = EXTRACT_16(vSrc, 0); - Value *src1 = EXTRACT_16(vSrc, 1); - - Value *indices0 = EXTRACT_16(vIndices, 0); - Value *indices1 = EXTRACT_16(vIndices, 1); - - Value *mask0 = EXTRACT_16(vMask, 0); - Value *mask1 = EXTRACT_16(vMask, 1); - - Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale); - Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale); - - vGather = JOIN_16(gather0, gather1); - } - - return vGather; + return VGATHERPS_16(vSrc, pBase, vIndices, vMask, C(scale)); } ////////////////////////////////////////////////////////////////////////// -- 2.30.2