33d37e3cecea7da635a2841d0156b464f2e59da2
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ****************************************************************************/
24 #ifndef __SWR_INTRIN_H__
25 #define __SWR_INTRIN_H__
29 #define SIMD_ARCH KNOB_ARCH
30 #include "simdlib_types.hpp"
32 typedef SIMDImpl::SIMD128Impl::Float simd4scalar
;
33 typedef SIMDImpl::SIMD128Impl::Double simd4scalard
;
34 typedef SIMDImpl::SIMD128Impl::Integer simd4scalari
;
35 typedef SIMDImpl::SIMD128Impl::Vec4 simd4vector
;
36 typedef SIMDImpl::SIMD128Impl::Mask simd4mask
;
38 typedef SIMDImpl::SIMD256Impl::Float simd8scalar
;
39 typedef SIMDImpl::SIMD256Impl::Double simd8scalard
;
40 typedef SIMDImpl::SIMD256Impl::Integer simd8scalari
;
41 typedef SIMDImpl::SIMD256Impl::Vec4 simd8vector
;
42 typedef SIMDImpl::SIMD256Impl::Mask simd8mask
;
44 typedef SIMDImpl::SIMD512Impl::Float simd16scalar
;
45 typedef SIMDImpl::SIMD512Impl::Double simd16scalard
;
46 typedef SIMDImpl::SIMD512Impl::Integer simd16scalari
;
47 typedef SIMDImpl::SIMD512Impl::Vec4 simd16vector
;
48 typedef SIMDImpl::SIMD512Impl::Mask simd16mask
;
50 #if KNOB_SIMD_WIDTH == 8
51 typedef simd8scalar simdscalar
;
52 typedef simd8scalard simdscalard
;
53 typedef simd8scalari simdscalari
;
54 typedef simd8vector simdvector
;
55 typedef simd8mask simdmask
;
57 #error Unsupported vector width
61 UINT
pdep_u32(UINT a
, UINT mask
)
63 #if KNOB_ARCH >= KNOB_ARCH_AVX2
64 return _pdep_u32(a
, mask
);
68 // copied from http://wm.ite.pl/articles/pdep-soft-emu.html
69 // using bsf instead of funky loop
71 while (_BitScanForward(&maskIndex
, mask
))
73 // 1. isolate lowest set bit of mask
74 const UINT lowest
= 1 << maskIndex
;
76 // 2. populate LSB from src
77 const UINT LSB
= (UINT
)((int)(a
<< 31) >> 31);
79 // 3. copy bit from mask
80 result
|= LSB
& lowest
;
82 // 4. clear lowest bit
85 // 5. prepare for next iteration
94 UINT
pext_u32(UINT a
, UINT mask
)
96 #if KNOB_ARCH >= KNOB_ARCH_AVX2
97 return _pext_u32(a
, mask
);
101 uint32_t currentBit
= 0;
102 while (_BitScanForward(&maskIndex
, mask
))
104 // 1. isolate lowest set bit of mask
105 const UINT lowest
= 1 << maskIndex
;
107 // 2. copy bit from mask
108 result
|= ((a
& lowest
) > 0) << currentBit
++;
110 // 3. clear lowest bit
117 #endif//__SWR_INTRIN_H__