From: Tim Rowley Date: Thu, 14 Dec 2017 19:39:29 +0000 (-0600) Subject: swr/rast: EXTRACT2 changed from vextract/vinsert to vshuffle X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=11a9d4f9b53722a491d9f23e848a02b741febd44;p=mesa.git swr/rast: EXTRACT2 changed from vextract/vinsert to vshuffle Reviewed-by: Bruce Cherniak --- diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index bdcafd28a39..0774889af10 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -653,16 +653,14 @@ namespace SwrJit } else { - Value *src0 = EXTRACT2_F(vSrc, 0); - Value *src1 = EXTRACT2_F(vSrc, 1); + Value *src0 = EXTRACT2(vSrc, 0); + Value *src1 = EXTRACT2(vSrc, 1); - Value *indices0 = EXTRACT2_I(vIndices, 0); - Value *indices1 = EXTRACT2_I(vIndices, 1); + Value *indices0 = EXTRACT2(vIndices, 0); + Value *indices1 = EXTRACT2(vIndices, 1); - Value *vmask16 = VMASK2(vMask); - - Value *mask0 = MASK(EXTRACT2_I(vmask16, 0)); // TODO: do this better.. - Value *mask1 = MASK(EXTRACT2_I(vmask16, 1)); + Value *mask0 = EXTRACT2(vMask, 0); + Value *mask1 = EXTRACT2(vMask, 1); Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale); Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale); @@ -738,16 +736,14 @@ namespace SwrJit } else { - Value *src0 = EXTRACT2_F(vSrc, 0); - Value *src1 = EXTRACT2_F(vSrc, 1); - - Value *indices0 = EXTRACT2_I(vIndices, 0); - Value *indices1 = EXTRACT2_I(vIndices, 1); + Value *src0 = EXTRACT2(vSrc, 0); + Value *src1 = EXTRACT2(vSrc, 1); - Value *vmask16 = VMASK2(vMask); + Value *indices0 = EXTRACT2(vIndices, 0); + Value *indices1 = EXTRACT2(vIndices, 1); - Value *mask0 = MASK(EXTRACT2_I(vmask16, 0)); // TODO: do this better.. - Value *mask1 = MASK(EXTRACT2_I(vmask16, 1)); + Value *mask0 = EXTRACT2(vMask, 0); + Value *mask1 = EXTRACT2(vMask, 1); Value *gather0 = GATHERDD(src0, pBase, indices0, mask0, scale); Value *gather1 = GATHERDD(src1, pBase, indices1, mask1, scale); @@ -809,34 +805,12 @@ namespace SwrJit } #if USE_SIMD16_BUILDER - ////////////////////////////////////////////////////////////////////////// - /// @brief - Value *Builder::EXTRACT2_F(Value *a2, uint32_t imm) - { - const uint32_t i0 = (imm > 0) ? mVWidth : 0; - - Value *result = VUNDEF_F(); - - for (uint32_t i = 0; i < mVWidth; i += 1) - { -#if 1 - if (!a2->getType()->getScalarType()->isFloatTy()) - { - a2 = BITCAST(a2, mSimd2FP32Ty); - } - -#endif - Value *temp = VEXTRACT(a2, C(i0 + i)); - - result = VINSERT(result, temp, C(i)); - } - - return result; - } - - Value *Builder::EXTRACT2_I(Value *a2, uint32_t imm) + Value *Builder::EXTRACT2(Value *x, uint32_t imm) { - return BITCAST(EXTRACT2_F(a2, imm), mSimdInt32Ty); + if (imm == 0) + return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7}); + else + return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15}); } Value *Builder::JOIN2(Value *a, Value *b) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 98bc5633512..646ed0efb2e 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -117,8 +117,7 @@ Value *VMASK2(Value *mask); ////////////////////////////////////////////////////////////////////////// #if USE_SIMD16_BUILDER -Value *EXTRACT2_F(Value *a2, uint32_t imm); -Value *EXTRACT2_I(Value *a2, uint32_t imm); +Value *EXTRACT2(Value *x, uint32_t imm); Value *JOIN2(Value *a, Value *b); #endif diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 8d97ddfdc98..aa911b58f3f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1078,14 +1078,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, vOffsets16 = ADD(vOffsets16, vInstanceStride16); // TODO: remove the following simd8 interop stuff once all code paths are fully widened to SIMD16.. - Value *vmask16 = VMASK2(vGatherMask16); - Value *vGatherMask = MASK(EXTRACT2_I(vmask16, 0)); - Value *vGatherMask2 = MASK(EXTRACT2_I(vmask16, 1)); - - Value *vOffsets = EXTRACT2_I(vOffsets16, 0); - Value *vOffsets2 = EXTRACT2_I(vOffsets16, 1); + Value *vGatherMask = EXTRACT2(vGatherMask16, 0); + Value *vGatherMask2 = EXTRACT2(vGatherMask16, 1); + Value *vOffsets = EXTRACT2(vOffsets16, 0); + Value *vOffsets2 = EXTRACT2(vOffsets16, 1); #else // override cur indices with 0 if pitch is 0 Value* pZeroPitchMask = ICMP_EQ(vStride, VIMMED1(0)); @@ -2322,8 +2320,8 @@ void FetchJit::Shuffle8bpcGatherd2(Shuffle8bpcArgs &args) // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now.. - Value *vGatherResult_lo = EXTRACT2_I(vGatherResult, 0); - Value *vGatherResult_hi = EXTRACT2_I(vGatherResult, 1); + Value *vGatherResult_lo = EXTRACT2(vGatherResult, 0); + Value *vGatherResult_hi = EXTRACT2(vGatherResult, 1); Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy); Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy); @@ -2482,8 +2480,8 @@ void FetchJit::Shuffle8bpcGatherd2(Shuffle8bpcArgs &args) break; } - Value *vGatherResult_lo = EXTRACT2_I(vGatherResult, 0); - Value *vGatherResult_hi = EXTRACT2_I(vGatherResult, 1); + Value *vGatherResult_lo = EXTRACT2(vGatherResult, 0); + Value *vGatherResult_hi = EXTRACT2(vGatherResult, 1); Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy); Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy); @@ -2787,8 +2785,8 @@ void FetchJit::Shuffle16bpcGather2(Shuffle16bpcArgs &args) { // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now.. - Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[0], 0); - Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[0], 1); + Value *vGatherResult_lo = EXTRACT2(vGatherResult[0], 0); + Value *vGatherResult_hi = EXTRACT2(vGatherResult[0], 1); Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy); Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy); @@ -2814,8 +2812,8 @@ void FetchJit::Shuffle16bpcGather2(Shuffle16bpcArgs &args) Value *vi128ZW_hi = nullptr; if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)) { - Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[1], 0); - Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[1], 1); + Value *vGatherResult_lo = EXTRACT2(vGatherResult[1], 0); + Value *vGatherResult_hi = EXTRACT2(vGatherResult[1], 1); Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy); Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy); @@ -2962,8 +2960,8 @@ void FetchJit::Shuffle16bpcGather2(Shuffle16bpcArgs &args) // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now.. - Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[selectedGather], 0); - Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[selectedGather], 1); + Value *vGatherResult_lo = EXTRACT2(vGatherResult[selectedGather], 0); + Value *vGatherResult_hi = EXTRACT2(vGatherResult[selectedGather], 1); Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask[selectedMask]), vGatherTy); Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask[selectedMask]), vGatherTy);