2 * Copyright 2015 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 * Author: Oded Gabbay <oded.gabbay@redhat.com>
28 * POWER8 intrinsics portability header.
35 #if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
37 #define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16)))
39 typedef VECTOR_ALIGN_16 vector
unsigned char __m128i
;
41 typedef VECTOR_ALIGN_16
union m128i
{
43 vector
signed int m128si
;
44 vector
unsigned int m128ui
;
52 vec_set_epi32 (int i3
, int i2
, int i1
, int i0
)
56 #ifdef PIPE_ARCH_LITTLE_ENDIAN
68 return (__m128i
) vdst
.m128si
;
72 vec_setr_epi32 (int i0
, int i1
, int i2
, int i3
)
74 return vec_set_epi32 (i3
, i2
, i1
, i0
);
78 vec_unpacklo_epi32 (__m128i even
, __m128i odd
)
80 static const __m128i perm_mask
=
81 #ifdef PIPE_ARCH_LITTLE_ENDIAN
82 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
84 {24, 25, 26, 27, 8, 9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15};
87 return vec_perm (even
, odd
, perm_mask
);
91 vec_unpackhi_epi32 (__m128i even
, __m128i odd
)
93 static const __m128i perm_mask
=
94 #ifdef PIPE_ARCH_LITTLE_ENDIAN
95 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
97 {16, 17, 18, 19, 0, 1, 2, 3, 20, 21, 22, 23, 4, 5, 6, 7};
100 return vec_perm (even
, odd
, perm_mask
);
103 static inline __m128i
104 vec_unpacklo_epi64 (__m128i even
, __m128i odd
)
106 static const __m128i perm_mask
=
107 #ifdef PIPE_ARCH_LITTLE_ENDIAN
108 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
110 {24, 25, 26, 27, 28, 29, 30, 31, 8, 9, 10, 11, 12, 13, 14, 15};
113 return vec_perm (even
, odd
, perm_mask
);
116 static inline __m128i
117 vec_unpackhi_epi64 (__m128i even
, __m128i odd
)
119 static const __m128i perm_mask
=
120 #ifdef PIPE_ARCH_LITTLE_ENDIAN
121 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
123 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7};
126 return vec_perm (even
, odd
, perm_mask
);
129 static inline __m128i
130 vec_add_epi32 (__m128i a
, __m128i b
)
132 return (__m128i
) vec_add ((vector
signed int) a
, (vector
signed int) b
);
135 static inline __m128i
136 vec_sub_epi32 (__m128i a
, __m128i b
)
138 return (__m128i
) vec_sub ((vector
signed int) a
, (vector
signed int) b
);
141 /* Call this function ONLY on POWER8 and newer platforms */
142 static inline __m128i
143 vec_mullo_epi32 (__m128i a
, __m128i b
)
148 "vmuluwm %0, %1, %2 \n"
157 transpose4_epi32(const __m128i
* restrict a
,
158 const __m128i
* restrict b
,
159 const __m128i
* restrict c
,
160 const __m128i
* restrict d
,
161 __m128i
* restrict o
,
162 __m128i
* restrict p
,
163 __m128i
* restrict q
,
164 __m128i
* restrict r
)
166 __m128i t0
= vec_unpacklo_epi32(*a
, *b
);
167 __m128i t1
= vec_unpacklo_epi32(*c
, *d
);
168 __m128i t2
= vec_unpackhi_epi32(*a
, *b
);
169 __m128i t3
= vec_unpackhi_epi32(*c
, *d
);
171 *o
= vec_unpacklo_epi64(t0
, t1
);
172 *p
= vec_unpackhi_epi64(t0
, t1
);
173 *q
= vec_unpacklo_epi64(t2
, t3
);
174 *r
= vec_unpackhi_epi64(t2
, t3
);
177 static inline __m128i
178 vec_slli_epi32 (__m128i vsrc
, unsigned int count
)
180 __m128i_union vec_count
;
183 return (__m128i
) vec_splats (0);
187 /* In VMX, all shift count fields must contain the same value */
188 vec_count
.m128si
= (vector
signed int) vec_splats (count
);
189 return (__m128i
) vec_sl ((vector
signed int) vsrc
, vec_count
.m128ui
);
192 static inline __m128i
193 vec_srli_epi32 (__m128i vsrc
, unsigned int count
)
195 __m128i_union vec_count
;
198 return (__m128i
) vec_splats (0);
202 /* In VMX, all shift count fields must contain the same value */
203 vec_count
.m128si
= (vector
signed int) vec_splats (count
);
204 return (__m128i
) vec_sr ((vector
signed int) vsrc
, vec_count
.m128ui
);
207 static inline __m128i
208 vec_srai_epi32 (__m128i vsrc
, unsigned int count
)
210 __m128i_union vec_count
;
213 return (__m128i
) vec_splats (0);
217 /* In VMX, all shift count fields must contain the same value */
218 vec_count
.m128si
= (vector
signed int) vec_splats (count
);
219 return (__m128i
) vec_sra ((vector
signed int) vsrc
, vec_count
.m128ui
);
222 static inline __m128i
223 vec_cmpeq_epi32 (__m128i a
, __m128i b
)
225 return (__m128i
) vec_cmpeq ((vector
signed int) a
, (vector
signed int) b
);
228 static inline __m128i
229 vec_loadu_si128 (const uint32_t* src
)
233 #ifdef PIPE_ARCH_LITTLE_ENDIAN
235 vsrc
.m128ui
= *((vector
unsigned int *) src
);
239 __m128i vmask
, tmp1
, tmp2
;
241 vmask
= vec_lvsl(0, src
);
243 tmp1
= (__m128i
) vec_ld (0, src
);
244 tmp2
= (__m128i
) vec_ld (15, src
);
245 vsrc
.m128ui
= (vector
unsigned int) vec_perm (tmp1
, tmp2
, vmask
);
252 static inline __m128i
253 vec_load_si128 (const uint32_t* src
)
257 vsrc
.m128ui
= *((vector
unsigned int *) src
);
263 vec_store_si128 (uint32_t* dest
, __m128i vdata
)
265 vec_st ((vector
unsigned int) vdata
, 0, dest
);
268 /* Call this function ONLY on POWER8 and newer platforms */
270 vec_movemask_epi8 (__m128i vsrc
)
275 vtemp
.m128i
= vec_vgbbd(vsrc
);
277 #ifdef PIPE_ARCH_LITTLE_ENDIAN
278 result
= vtemp
.ub
[15] << 8 | vtemp
.ub
[7];
280 result
= vtemp
.ub
[0] << 8 | vtemp
.ub
[8];
286 static inline __m128i
287 vec_packs_epi16 (__m128i a
, __m128i b
)
289 #ifdef PIPE_ARCH_LITTLE_ENDIAN
290 return (__m128i
) vec_packs ((vector
signed short) a
,
291 (vector
signed short) b
);
293 return (__m128i
) vec_packs ((vector
signed short) b
,
294 (vector
signed short) a
);
298 static inline __m128i
299 vec_packs_epi32 (__m128i a
, __m128i b
)
301 #ifdef PIPE_ARCH_LITTLE_ENDIAN
302 return (__m128i
) vec_packs ((vector
signed int) a
, (vector
signed int) b
);
304 return (__m128i
) vec_packs ((vector
signed int) b
, (vector
signed int) a
);
308 #endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
310 #endif /* U_PWR8_H_ */