From: Tim Rowley Date: Mon, 13 Nov 2017 21:11:21 +0000 (-0600) Subject: swr/rast: Use gather instruction for i32gather_ps on simd16/avx512 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=439904847e9c2970494c18e8c47bd6c38c0ed8ab;p=mesa.git swr/rast: Use gather instruction for i32gather_ps on simd16/avx512 Speed up avx512 platforms; fixes performance regression caused by swithc to simdlib. Reviewed-by: Bruce Cherniak Cc: mesa-stable@lists.freedesktop.org --- diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index 95e4c319099..c13b9f616aa 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -484,17 +484,7 @@ SIMD_WRAPPER_2(unpacklo_ps); template static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT)) { - uint32_t *pOffsets = (uint32_t*)&idx; - Float vResult; - float* pResult = (float*)&vResult; - for (uint32_t i = 0; i < SIMD_WIDTH; ++i) - { - uint32_t offset = pOffsets[i]; - offset = offset * static_cast(ScaleT); - pResult[i] = *(float const*)(((uint8_t const*)p + offset)); - } - - return vResult; + return _mm512_i32gather_ps(idx, p, static_cast(ScaleT)); } static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements)