+INLINE __m128 swizzleLane0(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+{
+ simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
+ simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
+ return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 0);
+}
+
+INLINE __m128 swizzleLane1(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+{
+ simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
+ simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
+ return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 0);
+}
+
+INLINE __m128 swizzleLane2(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+{
+ simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
+ simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
+ return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 0);
+}
+
+INLINE __m128 swizzleLane3(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+{
+ simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
+ simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
+ return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 0);
+}
+
+INLINE __m128 swizzleLane4(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+{
+ simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
+ simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
+ return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 1);
+}
+
+INLINE __m128 swizzleLane5(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+{
+ simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
+ simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
+ return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 1);
+}
+
+INLINE __m128 swizzleLane6(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+{
+ simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
+ simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
+ return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 1);
+}
+
+INLINE __m128 swizzleLane7(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+{
+ simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
+ simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
+ return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 1);
+}
+
+INLINE __m128 swizzleLane0(const simdvector &v)
+{
+ return swizzleLane0(v.x, v.y, v.z, v.w);
+}
+
+INLINE __m128 swizzleLane1(const simdvector &v)
+{
+ return swizzleLane1(v.x, v.y, v.z, v.w);
+}
+
+INLINE __m128 swizzleLane2(const simdvector &v)
+{
+ return swizzleLane2(v.x, v.y, v.z, v.w);
+}
+
+INLINE __m128 swizzleLane3(const simdvector &v)
+{
+ return swizzleLane3(v.x, v.y, v.z, v.w);
+}
+
+INLINE __m128 swizzleLane4(const simdvector &v)
+{
+ return swizzleLane4(v.x, v.y, v.z, v.w);
+}
+
+INLINE __m128 swizzleLane5(const simdvector &v)
+{
+ return swizzleLane5(v.x, v.y, v.z, v.w);
+}
+
+INLINE __m128 swizzleLane6(const simdvector &v)
+{
+ return swizzleLane6(v.x, v.y, v.z, v.w);
+}
+
+INLINE __m128 swizzleLane7(const simdvector &v)
+{
+ return swizzleLane7(v.x, v.y, v.z, v.w);
+}
+
+INLINE __m128 swizzleLaneN(const simdvector &v, int lane)
+{
+ switch (lane)
+ {
+ case 0:
+ return swizzleLane0(v);
+ case 1:
+ return swizzleLane1(v);
+ case 2:
+ return swizzleLane2(v);
+ case 3:
+ return swizzleLane3(v);
+ case 4:
+ return swizzleLane4(v);
+ case 5:
+ return swizzleLane5(v);
+ case 6:
+ return swizzleLane6(v);
+ case 7:
+ return swizzleLane7(v);
+ default:
+ return _mm_setzero_ps();
+ }
+}
+
+#if ENABLE_AVX512_SIMD16
+INLINE __m128 swizzleLane0(const simd16vector &v)
+{
+ return swizzleLane0(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+}
+
+INLINE __m128 swizzleLane1(const simd16vector &v)
+{
+ return swizzleLane1(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+}
+
+INLINE __m128 swizzleLane2(const simd16vector &v)
+{
+ return swizzleLane2(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+}
+
+INLINE __m128 swizzleLane3(const simd16vector &v)
+{
+ return swizzleLane3(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+}
+
+INLINE __m128 swizzleLane4(const simd16vector &v)
+{
+ return swizzleLane4(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+}
+
+INLINE __m128 swizzleLane5(const simd16vector &v)
+{
+ return swizzleLane5(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+}
+
+INLINE __m128 swizzleLane6(const simd16vector &v)
+{
+ return swizzleLane6(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+}
+
+INLINE __m128 swizzleLane7(const simd16vector &v)
+{
+ return swizzleLane7(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+}
+
+INLINE __m128 swizzleLane8(const simd16vector &v)
+{
+ return swizzleLane0(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+}
+
+INLINE __m128 swizzleLane9(const simd16vector &v)
+{
+ return swizzleLane1(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+}
+
+INLINE __m128 swizzleLaneA(const simd16vector &v)
+{
+ return swizzleLane2(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+}
+
+INLINE __m128 swizzleLaneB(const simd16vector &v)
+{
+ return swizzleLane3(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+}
+
+INLINE __m128 swizzleLaneC(const simd16vector &v)
+{
+ return swizzleLane4(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+}
+
+INLINE __m128 swizzleLaneD(const simd16vector &v)
+{
+ return swizzleLane5(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+}
+
+INLINE __m128 swizzleLaneE(const simd16vector &v)
+{
+ return swizzleLane6(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+}
+
+INLINE __m128 swizzleLaneF(const simd16vector &v)
+{
+ return swizzleLane7(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+}
+
+INLINE __m128 swizzleLaneN(const simd16vector &v, int lane)
+{
+ switch (lane)
+ {
+ case 0:
+ return swizzleLane0(v);
+ case 1:
+ return swizzleLane1(v);
+ case 2:
+ return swizzleLane2(v);
+ case 3:
+ return swizzleLane3(v);
+ case 4:
+ return swizzleLane4(v);
+ case 5:
+ return swizzleLane5(v);
+ case 6:
+ return swizzleLane6(v);
+ case 7:
+ return swizzleLane7(v);
+ case 8:
+ return swizzleLane8(v);
+ case 9:
+ return swizzleLane9(v);
+ case 10:
+ return swizzleLaneA(v);
+ case 11:
+ return swizzleLaneB(v);
+ case 12:
+ return swizzleLaneC(v);
+ case 13:
+ return swizzleLaneD(v);
+ case 14:
+ return swizzleLaneE(v);
+ case 15:
+ return swizzleLaneF(v);
+ default:
+ return _mm_setzero_ps();
+ }
+}
+
+#endif