src/gallium/drivers/swr/rasterizer/common/intrin.h

   1 /****************************************************************************
   2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 ****************************************************************************/
  23
  24 #ifndef __SWR_INTRIN_H__
  25 #define __SWR_INTRIN_H__
  26
  27 #include "os.h"
  28
  29 #define SIMD_ARCH KNOB_ARCH
  30 #include "simdlib_types.hpp"
  31
  32 typedef SIMDImpl::SIMD128Impl::Float                      simd4scalar;
  33 typedef SIMDImpl::SIMD128Impl::Double                     simd4scalard;
  34 typedef SIMDImpl::SIMD128Impl::Integer                    simd4scalari;
  35 typedef SIMDImpl::SIMD128Impl::Vec4                       simd4vector;
  36 typedef SIMDImpl::SIMD128Impl::Mask                       simd4mask;
  37
  38 typedef SIMDImpl::SIMD256Impl::Float                      simd8scalar;
  39 typedef SIMDImpl::SIMD256Impl::Double                     simd8scalard;
  40 typedef SIMDImpl::SIMD256Impl::Integer                    simd8scalari;
  41 typedef SIMDImpl::SIMD256Impl::Vec4                       simd8vector;
  42 typedef SIMDImpl::SIMD256Impl::Mask                       simd8mask;
  43
  44 typedef SIMDImpl::SIMD512Impl::Float                      simd16scalar;
  45 typedef SIMDImpl::SIMD512Impl::Double                     simd16scalard;
  46 typedef SIMDImpl::SIMD512Impl::Integer                    simd16scalari;
  47 typedef SIMDImpl::SIMD512Impl::Vec4                       simd16vector;
  48 typedef SIMDImpl::SIMD512Impl::Mask                       simd16mask;
  49
  50 #if KNOB_SIMD_WIDTH == 8
  51 typedef simd8scalar     simdscalar;
  52 typedef simd8scalard    simdscalard;
  53 typedef simd8scalari    simdscalari;
  54 typedef simd8vector     simdvector;
  55 typedef simd8mask       simdmask;
  56 #else
  57 #error Unsupported vector width
  58 #endif
  59
  60 INLINE
  61 UINT pdep_u32(UINT a, UINT mask)
  62 {
  63 #if KNOB_ARCH >= KNOB_ARCH_AVX2
  64     return _pdep_u32(a, mask);
  65 #else
  66     UINT result = 0;
  67
  68     // copied from http://wm.ite.pl/articles/pdep-soft-emu.html
  69     // using bsf instead of funky loop
  70     DWORD maskIndex;
  71     while (_BitScanForward(&maskIndex, mask))
  72     {
  73         // 1. isolate lowest set bit of mask
  74         const UINT lowest = 1 << maskIndex;
  75
  76         // 2. populate LSB from src
  77         const UINT LSB = (UINT)((int)(a << 31) >> 31);
  78
  79         // 3. copy bit from mask
  80         result |= LSB & lowest;
  81
  82         // 4. clear lowest bit
  83         mask &= ~lowest;
  84
  85         // 5. prepare for next iteration
  86         a >>= 1;
  87     }
  88
  89     return result;
  90 #endif
  91 }
  92
  93 INLINE
  94 UINT pext_u32(UINT a, UINT mask)
  95 {
  96 #if KNOB_ARCH >= KNOB_ARCH_AVX2
  97     return _pext_u32(a, mask);
  98 #else
  99     UINT result = 0;
 100     DWORD maskIndex;
 101     uint32_t currentBit = 0;
 102     while (_BitScanForward(&maskIndex, mask))
 103     {
 104         // 1. isolate lowest set bit of mask
 105         const UINT lowest = 1 << maskIndex;
 106
 107         // 2. copy bit from mask
 108         result |= ((a & lowest) > 0) << currentBit++;
 109
 110         // 3. clear lowest bit
 111         mask &= ~lowest;
 112     }
 113     return result;
 114 #endif
 115 }
 116
 117 #endif//__SWR_INTRIN_H__