1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * @file format_conversion.h
25 * @brief API implementation
27 ******************************************************************************/
28 #include "format_types.h"
29 #include "format_traits.h"
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Load SIMD packed pixels in SOA format and converts to
33 /// SOA RGBA32_FLOAT format.
34 /// @param pSrc - source data in SOA form
35 /// @param dst - output data in SOA form
36 template<SWR_FORMAT SrcFormat
>
37 INLINE
void LoadSOA(const uint8_t *pSrc
, simdvector
&dst
)
39 // fast path for float32
40 if ((FormatTraits
<SrcFormat
>::GetType(0) == SWR_TYPE_FLOAT
) && (FormatTraits
<SrcFormat
>::GetBPC(0) == 32))
42 auto lambda
= [&](int comp
)
44 simdscalar vComp
= _simd_load_ps((const float*)(pSrc
+ comp
*sizeof(simdscalar
)));
46 dst
.v
[FormatTraits
<SrcFormat
>::swizzle(comp
)] = vComp
;
49 UnrollerL
<0, FormatTraits
<SrcFormat
>::numComps
, 1>::step(lambda
);
53 auto lambda
= [&](int comp
)
55 // load SIMD components
56 simdscalar vComp
= FormatTraits
<SrcFormat
>::loadSOA(comp
, pSrc
);
59 vComp
= FormatTraits
<SrcFormat
>::unpack(comp
, vComp
);
62 if (FormatTraits
<SrcFormat
>::isNormalized(comp
))
64 vComp
= _simd_cvtepi32_ps(_simd_castps_si(vComp
));
65 vComp
= _simd_mul_ps(vComp
, _simd_set1_ps(FormatTraits
<SrcFormat
>::toFloat(comp
)));
68 dst
.v
[FormatTraits
<SrcFormat
>::swizzle(comp
)] = vComp
;
70 pSrc
+= (FormatTraits
<SrcFormat
>::GetBPC(comp
) * KNOB_SIMD_WIDTH
) / 8;
73 UnrollerL
<0, FormatTraits
<SrcFormat
>::numComps
, 1>::step(lambda
);
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Clamps the given component based on the requirements on the
78 /// Format template arg
79 /// @param vComp - SIMD vector of floats
80 /// @param Component - component
81 template<SWR_FORMAT Format
>
82 INLINE simdscalar
Clamp(simdscalar
const &vC
, uint32_t Component
)
84 simdscalar vComp
= vC
;
85 if (FormatTraits
<Format
>::isNormalized(Component
))
87 if (FormatTraits
<Format
>::GetType(Component
) == SWR_TYPE_UNORM
)
89 vComp
= _simd_max_ps(vComp
, _simd_setzero_ps());
92 if (FormatTraits
<Format
>::GetType(Component
) == SWR_TYPE_SNORM
)
94 vComp
= _simd_max_ps(vComp
, _simd_set1_ps(-1.0f
));
96 vComp
= _simd_min_ps(vComp
, _simd_set1_ps(1.0f
));
98 else if (FormatTraits
<Format
>::GetBPC(Component
) < 32)
100 if (FormatTraits
<Format
>::GetType(Component
) == SWR_TYPE_UINT
)
102 int iMax
= (1 << FormatTraits
<Format
>::GetBPC(Component
)) - 1;
104 simdscalari vCompi
= _simd_castps_si(vComp
);
105 vCompi
= _simd_max_epu32(vCompi
, _simd_set1_epi32(iMin
));
106 vCompi
= _simd_min_epu32(vCompi
, _simd_set1_epi32(iMax
));
107 vComp
= _simd_castsi_ps(vCompi
);
109 else if (FormatTraits
<Format
>::GetType(Component
) == SWR_TYPE_SINT
)
111 int iMax
= (1 << (FormatTraits
<Format
>::GetBPC(Component
) - 1)) - 1;
112 int iMin
= -1 - iMax
;
113 simdscalari vCompi
= _simd_castps_si(vComp
);
114 vCompi
= _simd_max_epi32(vCompi
, _simd_set1_epi32(iMin
));
115 vCompi
= _simd_min_epi32(vCompi
, _simd_set1_epi32(iMax
));
116 vComp
= _simd_castsi_ps(vCompi
);
123 //////////////////////////////////////////////////////////////////////////
124 /// @brief Normalize the given component based on the requirements on the
125 /// Format template arg
126 /// @param vComp - SIMD vector of floats
127 /// @param Component - component
128 template<SWR_FORMAT Format
>
129 INLINE simdscalar
Normalize(simdscalar
const &vC
, uint32_t Component
)
131 simdscalar vComp
= vC
;
132 if (FormatTraits
<Format
>::isNormalized(Component
))
134 vComp
= _simd_mul_ps(vComp
, _simd_set1_ps(FormatTraits
<Format
>::fromFloat(Component
)));
135 vComp
= _simd_castsi_ps(_simd_cvtps_epi32(vComp
));
140 //////////////////////////////////////////////////////////////////////////
141 /// @brief Convert and store simdvector of pixels in SOA
142 /// RGBA32_FLOAT to SOA format
143 /// @param src - source data in SOA form
144 /// @param dst - output data in SOA form
145 template<SWR_FORMAT DstFormat
>
146 INLINE
void StoreSOA(const simdvector
&src
, uint8_t *pDst
)
148 // fast path for float32
149 if ((FormatTraits
<DstFormat
>::GetType(0) == SWR_TYPE_FLOAT
) && (FormatTraits
<DstFormat
>::GetBPC(0) == 32))
151 for (uint32_t comp
= 0; comp
< FormatTraits
<DstFormat
>::numComps
; ++comp
)
153 simdscalar vComp
= src
.v
[FormatTraits
<DstFormat
>::swizzle(comp
)];
156 if (FormatTraits
<DstFormat
>::isSRGB
)
158 if (comp
< 3) // Input format is always RGBA32_FLOAT.
160 vComp
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(comp
, vComp
);
164 _simd_store_ps((float*)(pDst
+ comp
*sizeof(simdscalar
)), vComp
);
169 auto lambda
= [&](int comp
)
171 simdscalar vComp
= src
.v
[FormatTraits
<DstFormat
>::swizzle(comp
)];
174 if (FormatTraits
<DstFormat
>::isSRGB
)
176 if (comp
< 3) // Input format is always RGBA32_FLOAT.
178 vComp
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(comp
, vComp
);
183 vComp
= Clamp
<DstFormat
>(vComp
, comp
);
186 vComp
= Normalize
<DstFormat
>(vComp
, comp
);
189 vComp
= FormatTraits
<DstFormat
>::pack(comp
, vComp
);
192 FormatTraits
<DstFormat
>::storeSOA(comp
, pDst
, vComp
);
194 pDst
+= (FormatTraits
<DstFormat
>::GetBPC(comp
) * KNOB_SIMD_WIDTH
) / 8;
197 UnrollerL
<0, FormatTraits
<DstFormat
>::numComps
, 1>::step(lambda
);
200 #if ENABLE_AVX512_SIMD16
201 //////////////////////////////////////////////////////////////////////////
202 /// @brief Load SIMD packed pixels in SOA format and converts to
203 /// SOA RGBA32_FLOAT format.
204 /// @param pSrc - source data in SOA form
205 /// @param dst - output data in SOA form
206 template<SWR_FORMAT SrcFormat
>
207 INLINE
void SIMDCALL
LoadSOA(const uint8_t *pSrc
, simd16vector
&dst
)
209 // fast path for float32
210 if ((FormatTraits
<SrcFormat
>::GetType(0) == SWR_TYPE_FLOAT
) && (FormatTraits
<SrcFormat
>::GetBPC(0) == 32))
212 auto lambda
= [&](int comp
)
214 simd16scalar vComp
= _simd16_load_ps(reinterpret_cast<const float *>(pSrc
+ comp
* sizeof(simd16scalar
)));
216 dst
.v
[FormatTraits
<SrcFormat
>::swizzle(comp
)] = vComp
;
219 UnrollerL
<0, FormatTraits
<SrcFormat
>::numComps
, 1>::step(lambda
);
223 auto lambda
= [&](int comp
)
225 // load SIMD components
226 simd16scalar vComp
= FormatTraits
<SrcFormat
>::loadSOA_16(comp
, pSrc
);
229 vComp
= FormatTraits
<SrcFormat
>::unpack(comp
, vComp
);
232 if (FormatTraits
<SrcFormat
>::isNormalized(comp
))
234 vComp
= _simd16_cvtepi32_ps(_simd16_castps_si(vComp
));
235 vComp
= _simd16_mul_ps(vComp
, _simd16_set1_ps(FormatTraits
<SrcFormat
>::toFloat(comp
)));
238 dst
.v
[FormatTraits
<SrcFormat
>::swizzle(comp
)] = vComp
;
240 pSrc
+= (FormatTraits
<SrcFormat
>::GetBPC(comp
) * KNOB_SIMD16_WIDTH
) / 8;
243 UnrollerL
<0, FormatTraits
<SrcFormat
>::numComps
, 1>::step(lambda
);
246 //////////////////////////////////////////////////////////////////////////
247 /// @brief Clamps the given component based on the requirements on the
248 /// Format template arg
249 /// @param vComp - SIMD vector of floats
250 /// @param Component - component
251 template<SWR_FORMAT Format
>
252 INLINE simd16scalar SIMDCALL
Clamp(simd16scalar
const &v
, uint32_t Component
)
254 simd16scalar vComp
= v
;
255 if (FormatTraits
<Format
>::isNormalized(Component
))
257 if (FormatTraits
<Format
>::GetType(Component
) == SWR_TYPE_UNORM
)
259 vComp
= _simd16_max_ps(vComp
, _simd16_setzero_ps());
262 if (FormatTraits
<Format
>::GetType(Component
) == SWR_TYPE_SNORM
)
264 vComp
= _simd16_max_ps(vComp
, _simd16_set1_ps(-1.0f
));
266 vComp
= _simd16_min_ps(vComp
, _simd16_set1_ps(1.0f
));
268 else if (FormatTraits
<Format
>::GetBPC(Component
) < 32)
270 if (FormatTraits
<Format
>::GetType(Component
) == SWR_TYPE_UINT
)
272 int iMax
= (1 << FormatTraits
<Format
>::GetBPC(Component
)) - 1;
274 simd16scalari vCompi
= _simd16_castps_si(vComp
);
275 vCompi
= _simd16_max_epu32(vCompi
, _simd16_set1_epi32(iMin
));
276 vCompi
= _simd16_min_epu32(vCompi
, _simd16_set1_epi32(iMax
));
277 vComp
= _simd16_castsi_ps(vCompi
);
279 else if (FormatTraits
<Format
>::GetType(Component
) == SWR_TYPE_SINT
)
281 int iMax
= (1 << (FormatTraits
<Format
>::GetBPC(Component
) - 1)) - 1;
282 int iMin
= -1 - iMax
;
283 simd16scalari vCompi
= _simd16_castps_si(vComp
);
284 vCompi
= _simd16_max_epi32(vCompi
, _simd16_set1_epi32(iMin
));
285 vCompi
= _simd16_min_epi32(vCompi
, _simd16_set1_epi32(iMax
));
286 vComp
= _simd16_castsi_ps(vCompi
);
293 //////////////////////////////////////////////////////////////////////////
294 /// @brief Normalize the given component based on the requirements on the
295 /// Format template arg
296 /// @param vComp - SIMD vector of floats
297 /// @param Component - component
298 template<SWR_FORMAT Format
>
299 INLINE simd16scalar SIMDCALL
Normalize(simd16scalar
const &vComp
, uint32_t Component
)
301 simd16scalar r
= vComp
;
302 if (FormatTraits
<Format
>::isNormalized(Component
))
304 r
= _simd16_mul_ps(r
, _simd16_set1_ps(FormatTraits
<Format
>::fromFloat(Component
)));
305 r
= _simd16_castsi_ps(_simd16_cvtps_epi32(r
));
310 //////////////////////////////////////////////////////////////////////////
311 /// @brief Convert and store simdvector of pixels in SOA
312 /// RGBA32_FLOAT to SOA format
313 /// @param src - source data in SOA form
314 /// @param dst - output data in SOA form
315 template<SWR_FORMAT DstFormat
>
316 INLINE
void SIMDCALL
StoreSOA(const simd16vector
&src
, uint8_t *pDst
)
318 // fast path for float32
319 if ((FormatTraits
<DstFormat
>::GetType(0) == SWR_TYPE_FLOAT
) && (FormatTraits
<DstFormat
>::GetBPC(0) == 32))
321 for (uint32_t comp
= 0; comp
< FormatTraits
<DstFormat
>::numComps
; ++comp
)
323 simd16scalar vComp
= src
.v
[FormatTraits
<DstFormat
>::swizzle(comp
)];
326 if (FormatTraits
<DstFormat
>::isSRGB
)
328 if (comp
< 3) // Input format is always RGBA32_FLOAT.
330 vComp
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(comp
, vComp
);
334 _simd16_store_ps(reinterpret_cast<float *>(pDst
+ comp
* sizeof(simd16scalar
)), vComp
);
339 auto lambda
= [&](int comp
)
341 simd16scalar vComp
= src
.v
[FormatTraits
<DstFormat
>::swizzle(comp
)];
344 if (FormatTraits
<DstFormat
>::isSRGB
)
346 if (comp
< 3) // Input format is always RGBA32_FLOAT.
348 vComp
= FormatTraits
<R32G32B32A32_FLOAT
>::convertSrgb(comp
, vComp
);
353 vComp
= Clamp
<DstFormat
>(vComp
, comp
);
356 vComp
= Normalize
<DstFormat
>(vComp
, comp
);
359 vComp
= FormatTraits
<DstFormat
>::pack(comp
, vComp
);
362 FormatTraits
<DstFormat
>::storeSOA(comp
, pDst
, vComp
);
364 pDst
+= (FormatTraits
<DstFormat
>::GetBPC(comp
) * KNOB_SIMD16_WIDTH
) / 8;
367 UnrollerL
<0, FormatTraits
<DstFormat
>::numComps
, 1>::step(lambda
);