//-----------------------------------------------------------------------
// Load / store operations
//-----------------------------------------------------------------------
-template <ScaleFactor ScaleT>
+template <ScaleFactor ScaleT = ScaleFactor::SF_1>
static SIMDINLINE Float SIMDCALL
i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
};
}
+template <ScaleFactor ScaleT = ScaleFactor::SF_1>
+static SIMDINLINE Float SIMDCALL
+ sw_i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+{
+ return Float{
+ SIMD256T::template sw_i32gather_ps<ScaleT>(p, idx.v8[0]),
+ SIMD256T::template sw_i32gather_ps<ScaleT>(p, idx.v8[1]),
+ };
+}
+
static SIMDINLINE Float SIMDCALL
load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
{
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT>
+template <ScaleFactor ScaleT = ScaleFactor::SF_1>
static SIMDINLINE Float SIMDCALL
mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
{
};
}
+template <ScaleFactor ScaleT = ScaleFactor::SF_1>
+static SIMDINLINE Float SIMDCALL
+ sw_mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
+{
+ return Float{
+ SIMD256T::template sw_mask_i32gather_ps<ScaleT>(old.v8[0], p, idx.v8[0], mask.v8[0]),
+ SIMD256T::template sw_mask_i32gather_ps<ScaleT>(old.v8[1], p, idx.v8[1], mask.v8[1]),
+ };
+}
+
static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer const& mask, Float const& src)
{
SIMD256T::maskstore_ps(p, mask.v8[0], src.v8[0]);