swr/rast: Clang-Format most rasterizer source code
[mesa.git] / src / gallium / drivers / swr / rasterizer / common / simdlib_128_avx512_core.inl
1 /****************************************************************************
2 * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ****************************************************************************/
23 #if !defined(__SIMD_LIB_AVX512_HPP__)
24 #error Do not include this file directly, use "simdlib.hpp" instead.
25 #endif
26
27 //============================================================================
28 // SIMD128 AVX (512) implementation
29 //
30 // Since this implementation inherits from the AVX (2) implementation,
31 // the only operations below ones that replace AVX (2) operations.
32 // These use native AVX512 instructions with masking to enable a larger
33 // register set.
34 //============================================================================
35
36 #define SIMD_WRAPPER_1_(op, intrin, mask) \
37 static SIMDINLINE Float SIMDCALL op(Float a) \
38 { \
39 return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
40 }
41 #define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
42
43 #define SIMD_WRAPPER_1I_(op, intrin, mask) \
44 template <int ImmT> \
45 static SIMDINLINE Float SIMDCALL op(Float a) \
46 { \
47 return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
48 }
49 #define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
50
51 #define SIMD_WRAPPER_2_(op, intrin, mask) \
52 static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
53 { \
54 return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
55 }
56 #define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
57
58 #define SIMD_WRAPPER_2I(op) \
59 template <int ImmT> \
60 static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
61 { \
62 return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
63 }
64
65 #define SIMD_WRAPPER_3_(op, intrin, mask) \
66 static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
67 { \
68 return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
69 }
70 #define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
71
72 #define SIMD_DWRAPPER_1_(op, intrin, mask) \
73 static SIMDINLINE Double SIMDCALL op(Double a) \
74 { \
75 return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
76 }
77 #define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3))
78
79 #define SIMD_DWRAPPER_1I_(op, intrin, mask) \
80 template <int ImmT> \
81 static SIMDINLINE Double SIMDCALL op(Double a) \
82 { \
83 return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
84 }
85 #define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3))
86
87 #define SIMD_DWRAPPER_2_(op, intrin, mask) \
88 static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
89 { \
90 return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
91 }
92 #define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3))
93
94 #define SIMD_DWRAPPER_2I(op) \
95 template <int ImmT> \
96 static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
97 { \
98 return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT)); \
99 }
100
101 #define SIMD_IWRAPPER_1_(op, intrin, mask) \
102 static SIMDINLINE Integer SIMDCALL op(Integer a) \
103 { \
104 return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
105 }
106 #define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull))
107 #define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff))
108 #define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3))
109
110 #define SIMD_IWRAPPER_1I_(op, intrin, mask) \
111 template <int ImmT> \
112 static SIMDINLINE Integer SIMDCALL op(Integer a) \
113 { \
114 return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
115 }
116 #define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull))
117 #define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff))
118 #define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3))
119
120 #define SIMD_IWRAPPER_2_(op, intrin, mask) \
121 static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
122 { \
123 return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
124 }
125 #define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull))
126 #define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff))
127 #define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3))
128
129 #define SIMD_IWRAPPER_2I(op) \
130 template <int ImmT> \
131 static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
132 { \
133 return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
134 }
135
136 SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
137 SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
138 SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
139 SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
140 SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and
141 // _mm512_packs_epi16
142 SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and
143 // _mm512_packs_epi32
144 SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and
145 // _mm512_packus_epi16
146 SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and
147 // _mm512_packus_epi32
148 SIMD_IWRAPPER_2_16(unpackhi_epi16);
149 SIMD_IWRAPPER_2_64(unpackhi_epi64);
150 SIMD_IWRAPPER_2_8(unpackhi_epi8);
151 SIMD_IWRAPPER_2_16(unpacklo_epi16);
152 SIMD_IWRAPPER_2_64(unpacklo_epi64);
153 SIMD_IWRAPPER_2_8(unpacklo_epi8);
154
155 static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
156 {
157 __mmask64 m = 0xffffull;
158 return static_cast<uint32_t>(_mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
159 }
160
161 #undef SIMD_WRAPPER_1_
162 #undef SIMD_WRAPPER_1
163 #undef SIMD_WRAPPER_1I_
164 #undef SIMD_WRAPPER_1I
165 #undef SIMD_WRAPPER_2_
166 #undef SIMD_WRAPPER_2
167 #undef SIMD_WRAPPER_2I
168 #undef SIMD_WRAPPER_3_
169 #undef SIMD_WRAPPER_3
170 #undef SIMD_DWRAPPER_1_
171 #undef SIMD_DWRAPPER_1
172 #undef SIMD_DWRAPPER_1I_
173 #undef SIMD_DWRAPPER_1I
174 #undef SIMD_DWRAPPER_2_
175 #undef SIMD_DWRAPPER_2
176 #undef SIMD_DWRAPPER_2I
177 #undef SIMD_IWRAPPER_1_
178 #undef SIMD_IWRAPPER_1_8
179 #undef SIMD_IWRAPPER_1_16
180 #undef SIMD_IWRAPPER_1_32
181 #undef SIMD_IWRAPPER_1_64
182 #undef SIMD_IWRAPPER_1I_
183 #undef SIMD_IWRAPPER_1I_8
184 #undef SIMD_IWRAPPER_1I_16
185 #undef SIMD_IWRAPPER_1I_32
186 #undef SIMD_IWRAPPER_1I_64
187 #undef SIMD_IWRAPPER_2_
188 #undef SIMD_IWRAPPER_2_8
189 #undef SIMD_IWRAPPER_2_16
190 #undef SIMD_IWRAPPER_2_32
191 #undef SIMD_IWRAPPER_2_64
192 #undef SIMD_IWRAPPER_2I
193 //#undef SIMD_IWRAPPER_2I_8
194 //#undef SIMD_IWRAPPER_2I_16
195 //#undef SIMD_IWRAPPER_2I_32
196 //#undef SIMD_IWRAPPER_2I_64