llvmpipe: add POWER8 portability file - u_pwr8.h
[mesa.git] / src / gallium / auxiliary / util / u_pwr8.h
1 /*
2 * Copyright 2015 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Author: Oded Gabbay <oded.gabbay@redhat.com>
24 */
25
26 /**
27 * @file
28 * POWER8 intrinsics portability header.
29 *
30 */
31
32 #ifndef U_PWR8_H_
33 #define U_PWR8_H_
34
35 #if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
36
37 #define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16)))
38
39 typedef VECTOR_ALIGN_16 vector unsigned char __m128i;
40
41 typedef VECTOR_ALIGN_16 union m128i {
42 __m128i m128i;
43 vector signed int m128si;
44 vector unsigned int m128ui;
45 ubyte ub[16];
46 ushort us[8];
47 int i[4];
48 uint ui[4];
49 } __m128i_union;
50
51 static inline __m128i
52 vec_set_epi32 (int i3, int i2, int i1, int i0)
53 {
54 __m128i_union vdst;
55
56 #ifdef PIPE_ARCH_LITTLE_ENDIAN
57 vdst.i[0] = i0;
58 vdst.i[1] = i1;
59 vdst.i[2] = i2;
60 vdst.i[3] = i3;
61 #else
62 vdst.i[3] = i0;
63 vdst.i[2] = i1;
64 vdst.i[1] = i2;
65 vdst.i[0] = i3;
66 #endif
67
68 return (__m128i) vdst.m128si;
69 }
70
71 static inline __m128i
72 vec_setr_epi32 (int i0, int i1, int i2, int i3)
73 {
74 return vec_set_epi32 (i3, i2, i1, i0);
75 }
76
77 static inline __m128i
78 vec_unpacklo_epi32 (__m128i even, __m128i odd)
79 {
80 static const __m128i perm_mask =
81 #ifdef PIPE_ARCH_LITTLE_ENDIAN
82 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
83 #else
84 {24, 25, 26, 27, 8, 9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15};
85 #endif
86
87 return vec_perm (even, odd, perm_mask);
88 }
89
90 static inline __m128i
91 vec_unpackhi_epi32 (__m128i even, __m128i odd)
92 {
93 static const __m128i perm_mask =
94 #ifdef PIPE_ARCH_LITTLE_ENDIAN
95 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
96 #else
97 {16, 17, 18, 19, 0, 1, 2, 3, 20, 21, 22, 23, 4, 5, 6, 7};
98 #endif
99
100 return vec_perm (even, odd, perm_mask);
101 }
102
103 static inline __m128i
104 vec_unpacklo_epi64 (__m128i even, __m128i odd)
105 {
106 static const __m128i perm_mask =
107 #ifdef PIPE_ARCH_LITTLE_ENDIAN
108 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
109 #else
110 {24, 25, 26, 27, 28, 29, 30, 31, 8, 9, 10, 11, 12, 13, 14, 15};
111 #endif
112
113 return vec_perm (even, odd, perm_mask);
114 }
115
116 static inline __m128i
117 vec_unpackhi_epi64 (__m128i even, __m128i odd)
118 {
119 static const __m128i perm_mask =
120 #ifdef PIPE_ARCH_LITTLE_ENDIAN
121 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
122 #else
123 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7};
124 #endif
125
126 return vec_perm (even, odd, perm_mask);
127 }
128
129 static inline __m128i
130 vec_add_epi32 (__m128i a, __m128i b)
131 {
132 return (__m128i) vec_add ((vector signed int) a, (vector signed int) b);
133 }
134
135 static inline __m128i
136 vec_sub_epi32 (__m128i a, __m128i b)
137 {
138 return (__m128i) vec_sub ((vector signed int) a, (vector signed int) b);
139 }
140
141 /* Call this function ONLY on POWER8 and newer platforms */
142 static inline __m128i
143 vec_mullo_epi32 (__m128i a, __m128i b)
144 {
145 __m128i v;
146
147 __asm__(
148 "vmuluwm %0, %1, %2 \n"
149 : "=v" (v)
150 : "v" (a), "v" (b)
151 );
152
153 return v;
154 }
155
156 static inline void
157 transpose4_epi32(const __m128i * restrict a,
158 const __m128i * restrict b,
159 const __m128i * restrict c,
160 const __m128i * restrict d,
161 __m128i * restrict o,
162 __m128i * restrict p,
163 __m128i * restrict q,
164 __m128i * restrict r)
165 {
166 __m128i t0 = vec_unpacklo_epi32(*a, *b);
167 __m128i t1 = vec_unpacklo_epi32(*c, *d);
168 __m128i t2 = vec_unpackhi_epi32(*a, *b);
169 __m128i t3 = vec_unpackhi_epi32(*c, *d);
170
171 *o = vec_unpacklo_epi64(t0, t1);
172 *p = vec_unpackhi_epi64(t0, t1);
173 *q = vec_unpacklo_epi64(t2, t3);
174 *r = vec_unpackhi_epi64(t2, t3);
175 }
176
177 static inline __m128i
178 vec_slli_epi32 (__m128i vsrc, unsigned int count)
179 {
180 __m128i_union vec_count;
181
182 if (count >= 32)
183 return (__m128i) vec_splats (0);
184 else if (count == 0)
185 return vsrc;
186
187 /* In VMX, all shift count fields must contain the same value */
188 vec_count.m128si = (vector signed int) vec_splats (count);
189 return (__m128i) vec_sl ((vector signed int) vsrc, vec_count.m128ui);
190 }
191
192 static inline __m128i
193 vec_srli_epi32 (__m128i vsrc, unsigned int count)
194 {
195 __m128i_union vec_count;
196
197 if (count >= 32)
198 return (__m128i) vec_splats (0);
199 else if (count == 0)
200 return vsrc;
201
202 /* In VMX, all shift count fields must contain the same value */
203 vec_count.m128si = (vector signed int) vec_splats (count);
204 return (__m128i) vec_sr ((vector signed int) vsrc, vec_count.m128ui);
205 }
206
207 static inline __m128i
208 vec_srai_epi32 (__m128i vsrc, unsigned int count)
209 {
210 __m128i_union vec_count;
211
212 if (count >= 32)
213 return (__m128i) vec_splats (0);
214 else if (count == 0)
215 return vsrc;
216
217 /* In VMX, all shift count fields must contain the same value */
218 vec_count.m128si = (vector signed int) vec_splats (count);
219 return (__m128i) vec_sra ((vector signed int) vsrc, vec_count.m128ui);
220 }
221
222 static inline __m128i
223 vec_cmpeq_epi32 (__m128i a, __m128i b)
224 {
225 return (__m128i) vec_cmpeq ((vector signed int) a, (vector signed int) b);
226 }
227
228 static inline __m128i
229 vec_loadu_si128 (const uint32_t* src)
230 {
231 __m128i_union vsrc;
232
233 #ifdef PIPE_ARCH_LITTLE_ENDIAN
234
235 vsrc.m128ui = *((vector unsigned int *) src);
236
237 #else
238
239 __m128i vmask, tmp1, tmp2;
240
241 vmask = vec_lvsl(0, src);
242
243 tmp1 = (__m128i) vec_ld (0, src);
244 tmp2 = (__m128i) vec_ld (15, src);
245 vsrc.m128ui = (vector unsigned int) vec_perm (tmp1, tmp2, vmask);
246
247 #endif
248
249 return vsrc.m128i;
250 }
251
252 static inline __m128i
253 vec_load_si128 (const uint32_t* src)
254 {
255 __m128i_union vsrc;
256
257 vsrc.m128ui = *((vector unsigned int *) src);
258
259 return vsrc.m128i;
260 }
261
262 static inline void
263 vec_store_si128 (uint32_t* dest, __m128i vdata)
264 {
265 vec_st ((vector unsigned int) vdata, 0, dest);
266 }
267
268 /* Call this function ONLY on POWER8 and newer platforms */
269 static inline int
270 vec_movemask_epi8 (__m128i vsrc)
271 {
272 __m128i_union vtemp;
273 int result;
274
275 vtemp.m128i = vec_vgbbd(vsrc);
276
277 #ifdef PIPE_ARCH_LITTLE_ENDIAN
278 result = vtemp.ub[15] << 8 | vtemp.ub[7];
279 #else
280 result = vtemp.ub[0] << 8 | vtemp.ub[8];
281 #endif
282
283 return result;
284 }
285
286 static inline __m128i
287 vec_packs_epi16 (__m128i a, __m128i b)
288 {
289 #ifdef PIPE_ARCH_LITTLE_ENDIAN
290 return (__m128i) vec_packs ((vector signed short) a,
291 (vector signed short) b);
292 #else
293 return (__m128i) vec_packs ((vector signed short) b,
294 (vector signed short) a);
295 #endif
296 }
297
298 static inline __m128i
299 vec_packs_epi32 (__m128i a, __m128i b)
300 {
301 #ifdef PIPE_ARCH_LITTLE_ENDIAN
302 return (__m128i) vec_packs ((vector signed int) a, (vector signed int) b);
303 #else
304 return (__m128i) vec_packs ((vector signed int) b, (vector signed int) a);
305 #endif
306 }
307
308 #endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
309
310 #endif /* U_PWR8_H_ */