swr: [rasterizer core] Finish SIMD16 PA OPT except tesselation
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / format_conversion.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file format_conversion.h
24 *
25 * @brief API implementation
26 *
27 ******************************************************************************/
28 #include "format_types.h"
29 #include "format_traits.h"
30
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Load SIMD packed pixels in SOA format and converts to
33 /// SOA RGBA32_FLOAT format.
34 /// @param pSrc - source data in SOA form
35 /// @param dst - output data in SOA form
36 template<SWR_FORMAT SrcFormat>
37 INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst)
38 {
39 // fast path for float32
40 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
41 {
42 auto lambda = [&](int comp)
43 {
44 simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp*sizeof(simdscalar)));
45
46 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
47 };
48
49 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
50 return;
51 }
52
53 auto lambda = [&](int comp)
54 {
55 // load SIMD components
56 simdscalar vComp = FormatTraits<SrcFormat>::loadSOA(comp, pSrc);
57
58 // unpack
59 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp);
60
61 // convert
62 if (FormatTraits<SrcFormat>::isNormalized(comp))
63 {
64 vComp = _simd_cvtepi32_ps(_simd_castps_si(vComp));
65 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<SrcFormat>::toFloat(comp)));
66 }
67
68 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
69
70 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8;
71 };
72
73 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
74 }
75
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Clamps the given component based on the requirements on the
78 /// Format template arg
79 /// @param vComp - SIMD vector of floats
80 /// @param Component - component
81 template<SWR_FORMAT Format>
82 INLINE simdscalar Clamp(simdscalar vComp, uint32_t Component)
83 {
84 if (FormatTraits<Format>::isNormalized(Component))
85 {
86 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM)
87 {
88 vComp = _simd_max_ps(vComp, _simd_setzero_ps());
89 }
90
91 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM)
92 {
93 vComp = _simd_max_ps(vComp, _simd_set1_ps(-1.0f));
94 }
95 vComp = _simd_min_ps(vComp, _simd_set1_ps(1.0f));
96 }
97 else if (FormatTraits<Format>::GetBPC(Component) < 32)
98 {
99 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
100 {
101 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
102 int iMin = 0;
103 simdscalari vCompi = _simd_castps_si(vComp);
104 vCompi = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin));
105 vCompi = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax));
106 vComp = _simd_castsi_ps(vCompi);
107 }
108 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
109 {
110 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
111 int iMin = -1 - iMax;
112 simdscalari vCompi = _simd_castps_si(vComp);
113 vCompi = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin));
114 vCompi = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax));
115 vComp = _simd_castsi_ps(vCompi);
116 }
117 }
118
119 return vComp;
120 }
121
122 //////////////////////////////////////////////////////////////////////////
123 /// @brief Normalize the given component based on the requirements on the
124 /// Format template arg
125 /// @param vComp - SIMD vector of floats
126 /// @param Component - component
127 template<SWR_FORMAT Format>
128 INLINE simdscalar Normalize(simdscalar vComp, uint32_t Component)
129 {
130 if (FormatTraits<Format>::isNormalized(Component))
131 {
132 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<Format>::fromFloat(Component)));
133 vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
134 }
135 return vComp;
136 }
137
138 //////////////////////////////////////////////////////////////////////////
139 /// @brief Convert and store simdvector of pixels in SOA
140 /// RGBA32_FLOAT to SOA format
141 /// @param src - source data in SOA form
142 /// @param dst - output data in SOA form
143 template<SWR_FORMAT DstFormat>
144 INLINE void StoreSOA(const simdvector &src, uint8_t *pDst)
145 {
146 // fast path for float32
147 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
148 {
149 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
150 {
151 simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
152
153 // Gamma-correct
154 if (FormatTraits<DstFormat>::isSRGB)
155 {
156 if (comp < 3) // Input format is always RGBA32_FLOAT.
157 {
158 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
159 }
160 }
161
162 _simd_store_ps((float*)(pDst + comp*sizeof(simdscalar)), vComp);
163 }
164 return;
165 }
166
167 auto lambda = [&](int comp)
168 {
169 simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
170
171 // Gamma-correct
172 if (FormatTraits<DstFormat>::isSRGB)
173 {
174 if (comp < 3) // Input format is always RGBA32_FLOAT.
175 {
176 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
177 }
178 }
179
180 // clamp
181 vComp = Clamp<DstFormat>(vComp, comp);
182
183 // normalize
184 vComp = Normalize<DstFormat>(vComp, comp);
185
186 // pack
187 vComp = FormatTraits<DstFormat>::pack(comp, vComp);
188
189 // store
190 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp);
191
192 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8;
193 };
194
195 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda);
196 }
197
198 #if ENABLE_AVX512_SIMD16
199 //////////////////////////////////////////////////////////////////////////
200 /// @brief Load SIMD packed pixels in SOA format and converts to
201 /// SOA RGBA32_FLOAT format.
202 /// @param pSrc - source data in SOA form
203 /// @param dst - output data in SOA form
204 template<SWR_FORMAT SrcFormat>
205 INLINE void LoadSOA(const uint8_t *pSrc, simd16vector &dst)
206 {
207 // fast path for float32
208 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
209 {
210 auto lambda = [&](int comp)
211 {
212 simd16scalar vComp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc + comp * sizeof(simd16scalar)));
213
214 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
215 };
216
217 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
218 return;
219 }
220
221 auto lambda = [&](int comp)
222 {
223 // load SIMD components
224 simd16scalar vComp = FormatTraits<SrcFormat>::loadSOA_16(comp, pSrc);
225
226 // unpack
227 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp);
228
229 // convert
230 if (FormatTraits<SrcFormat>::isNormalized(comp))
231 {
232 vComp = _simd16_cvtepi32_ps(_simd16_castps_si(vComp));
233 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<SrcFormat>::toFloat(comp)));
234 }
235
236 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
237
238 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD16_WIDTH) / 8;
239 };
240
241 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
242 }
243
244 //////////////////////////////////////////////////////////////////////////
245 /// @brief Clamps the given component based on the requirements on the
246 /// Format template arg
247 /// @param vComp - SIMD vector of floats
248 /// @param Component - component
249 template<SWR_FORMAT Format>
250 INLINE simd16scalar Clamp(simd16scalar vComp, uint32_t Component)
251 {
252 if (FormatTraits<Format>::isNormalized(Component))
253 {
254 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM)
255 {
256 vComp = _simd16_max_ps(vComp, _simd16_setzero_ps());
257 }
258
259 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM)
260 {
261 vComp = _simd16_max_ps(vComp, _simd16_set1_ps(-1.0f));
262 }
263 vComp = _simd16_min_ps(vComp, _simd16_set1_ps(1.0f));
264 }
265 else if (FormatTraits<Format>::GetBPC(Component) < 32)
266 {
267 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
268 {
269 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
270 int iMin = 0;
271 simd16scalari vCompi = _simd16_castps_si(vComp);
272 vCompi = _simd16_max_epu32(vCompi, _simd16_set1_epi32(iMin));
273 vCompi = _simd16_min_epu32(vCompi, _simd16_set1_epi32(iMax));
274 vComp = _simd16_castsi_ps(vCompi);
275 }
276 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
277 {
278 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
279 int iMin = -1 - iMax;
280 simd16scalari vCompi = _simd16_castps_si(vComp);
281 vCompi = _simd16_max_epi32(vCompi, _simd16_set1_epi32(iMin));
282 vCompi = _simd16_min_epi32(vCompi, _simd16_set1_epi32(iMax));
283 vComp = _simd16_castsi_ps(vCompi);
284 }
285 }
286
287 return vComp;
288 }
289
290 //////////////////////////////////////////////////////////////////////////
291 /// @brief Normalize the given component based on the requirements on the
292 /// Format template arg
293 /// @param vComp - SIMD vector of floats
294 /// @param Component - component
295 template<SWR_FORMAT Format>
296 INLINE simd16scalar Normalize(simd16scalar vComp, uint32_t Component)
297 {
298 if (FormatTraits<Format>::isNormalized(Component))
299 {
300 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<Format>::fromFloat(Component)));
301 vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
302 }
303 return vComp;
304 }
305
306 //////////////////////////////////////////////////////////////////////////
307 /// @brief Convert and store simdvector of pixels in SOA
308 /// RGBA32_FLOAT to SOA format
309 /// @param src - source data in SOA form
310 /// @param dst - output data in SOA form
311 template<SWR_FORMAT DstFormat>
312 INLINE void StoreSOA(const simd16vector &src, uint8_t *pDst)
313 {
314 // fast path for float32
315 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
316 {
317 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
318 {
319 simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
320
321 // Gamma-correct
322 if (FormatTraits<DstFormat>::isSRGB)
323 {
324 if (comp < 3) // Input format is always RGBA32_FLOAT.
325 {
326 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
327 }
328 }
329
330 _simd16_store_ps(reinterpret_cast<float *>(pDst + comp * sizeof(simd16scalar)), vComp);
331 }
332 return;
333 }
334
335 auto lambda = [&](int comp)
336 {
337 simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
338
339 // Gamma-correct
340 if (FormatTraits<DstFormat>::isSRGB)
341 {
342 if (comp < 3) // Input format is always RGBA32_FLOAT.
343 {
344 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
345 }
346 }
347
348 // clamp
349 vComp = Clamp<DstFormat>(vComp, comp);
350
351 // normalize
352 vComp = Normalize<DstFormat>(vComp, comp);
353
354 // pack
355 vComp = FormatTraits<DstFormat>::pack(comp, vComp);
356
357 // store
358 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp);
359
360 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD16_WIDTH) / 8;
361 };
362
363 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda);
364 }
365
366 #endif