2 * Copyright 2015 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 * Author: Oded Gabbay <oded.gabbay@redhat.com>
28 * POWER8 intrinsics portability header.
35 #if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
37 #define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16)))
39 typedef VECTOR_ALIGN_16 vector
unsigned char __m128i
;
41 typedef VECTOR_ALIGN_16
union m128i
{
43 vector
signed int m128si
;
44 vector
unsigned int m128ui
;
52 vec_set_epi32 (int i3
, int i2
, int i1
, int i0
)
56 #ifdef PIPE_ARCH_LITTLE_ENDIAN
68 return (__m128i
) vdst
.m128si
;
72 vec_setr_epi32 (int i0
, int i1
, int i2
, int i3
)
74 return vec_set_epi32 (i3
, i2
, i1
, i0
);
78 vec_unpacklo_epi32 (__m128i even
, __m128i odd
)
80 static const __m128i perm_mask
=
81 #ifdef PIPE_ARCH_LITTLE_ENDIAN
82 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
84 {24, 25, 26, 27, 8, 9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15};
87 return vec_perm (even
, odd
, perm_mask
);
91 vec_unpackhi_epi32 (__m128i even
, __m128i odd
)
93 static const __m128i perm_mask
=
94 #ifdef PIPE_ARCH_LITTLE_ENDIAN
95 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
97 {16, 17, 18, 19, 0, 1, 2, 3, 20, 21, 22, 23, 4, 5, 6, 7};
100 return vec_perm (even
, odd
, perm_mask
);
103 static inline __m128i
104 vec_unpacklo_epi64 (__m128i even
, __m128i odd
)
106 static const __m128i perm_mask
=
107 #ifdef PIPE_ARCH_LITTLE_ENDIAN
108 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
110 {24, 25, 26, 27, 28, 29, 30, 31, 8, 9, 10, 11, 12, 13, 14, 15};
113 return vec_perm (even
, odd
, perm_mask
);
116 static inline __m128i
117 vec_unpackhi_epi64 (__m128i even
, __m128i odd
)
119 static const __m128i perm_mask
=
120 #ifdef PIPE_ARCH_LITTLE_ENDIAN
121 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
123 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7};
126 return vec_perm (even
, odd
, perm_mask
);
129 static inline __m128i
130 vec_add_epi32 (__m128i a
, __m128i b
)
132 return (__m128i
) vec_add ((vector
signed int) a
, (vector
signed int) b
);
135 static inline __m128i
136 vec_sub_epi32 (__m128i a
, __m128i b
)
138 return (__m128i
) vec_sub ((vector
signed int) a
, (vector
signed int) b
);
141 /* Call this function ONLY on POWER8 and newer platforms */
142 static inline __m128i
143 vec_mullo_epi32 (__m128i a
, __m128i b
)
148 "vmuluwm %0, %1, %2 \n"
156 static inline __m128i
157 vec_andnot_si128 (__m128i a
, __m128i b
)
159 return vec_andc (b
, a
);
163 transpose4_epi32(const __m128i
* restrict a
,
164 const __m128i
* restrict b
,
165 const __m128i
* restrict c
,
166 const __m128i
* restrict d
,
167 __m128i
* restrict o
,
168 __m128i
* restrict p
,
169 __m128i
* restrict q
,
170 __m128i
* restrict r
)
172 __m128i t0
= vec_unpacklo_epi32(*a
, *b
);
173 __m128i t1
= vec_unpacklo_epi32(*c
, *d
);
174 __m128i t2
= vec_unpackhi_epi32(*a
, *b
);
175 __m128i t3
= vec_unpackhi_epi32(*c
, *d
);
177 *o
= vec_unpacklo_epi64(t0
, t1
);
178 *p
= vec_unpackhi_epi64(t0
, t1
);
179 *q
= vec_unpacklo_epi64(t2
, t3
);
180 *r
= vec_unpackhi_epi64(t2
, t3
);
183 static inline __m128i
184 vec_slli_epi32 (__m128i vsrc
, unsigned int count
)
186 __m128i_union vec_count
;
189 return (__m128i
) vec_splats (0);
193 /* In VMX, all shift count fields must contain the same value */
194 vec_count
.m128si
= (vector
signed int) vec_splats (count
);
195 return (__m128i
) vec_sl ((vector
signed int) vsrc
, vec_count
.m128ui
);
198 static inline __m128i
199 vec_srli_epi32 (__m128i vsrc
, unsigned int count
)
201 __m128i_union vec_count
;
204 return (__m128i
) vec_splats (0);
208 /* In VMX, all shift count fields must contain the same value */
209 vec_count
.m128si
= (vector
signed int) vec_splats (count
);
210 return (__m128i
) vec_sr ((vector
signed int) vsrc
, vec_count
.m128ui
);
213 static inline __m128i
214 vec_srai_epi32 (__m128i vsrc
, unsigned int count
)
216 __m128i_union vec_count
;
219 return (__m128i
) vec_splats (0);
223 /* In VMX, all shift count fields must contain the same value */
224 vec_count
.m128si
= (vector
signed int) vec_splats (count
);
225 return (__m128i
) vec_sra ((vector
signed int) vsrc
, vec_count
.m128ui
);
228 static inline __m128i
229 vec_cmpeq_epi32 (__m128i a
, __m128i b
)
231 return (__m128i
) vec_cmpeq ((vector
signed int) a
, (vector
signed int) b
);
234 static inline __m128i
235 vec_loadu_si128 (const uint32_t* src
)
239 #ifdef PIPE_ARCH_LITTLE_ENDIAN
241 vsrc
.m128ui
= *((vector
unsigned int *) src
);
245 __m128i vmask
, tmp1
, tmp2
;
247 vmask
= vec_lvsl(0, src
);
249 tmp1
= (__m128i
) vec_ld (0, src
);
250 tmp2
= (__m128i
) vec_ld (15, src
);
251 vsrc
.m128ui
= (vector
unsigned int) vec_perm (tmp1
, tmp2
, vmask
);
258 static inline __m128i
259 vec_load_si128 (const uint32_t* src
)
263 vsrc
.m128ui
= *((vector
unsigned int *) src
);
269 vec_store_si128 (uint32_t* dest
, __m128i vdata
)
271 vec_st ((vector
unsigned int) vdata
, 0, dest
);
274 /* Call this function ONLY on POWER8 and newer platforms */
276 vec_movemask_epi8 (__m128i vsrc
)
281 vtemp
.m128i
= vec_vgbbd(vsrc
);
283 #ifdef PIPE_ARCH_LITTLE_ENDIAN
284 result
= vtemp
.ub
[15] << 8 | vtemp
.ub
[7];
286 result
= vtemp
.ub
[0] << 8 | vtemp
.ub
[8];
292 static inline __m128i
293 vec_packs_epi16 (__m128i a
, __m128i b
)
295 #ifdef PIPE_ARCH_LITTLE_ENDIAN
296 return (__m128i
) vec_packs ((vector
signed short) a
,
297 (vector
signed short) b
);
299 return (__m128i
) vec_packs ((vector
signed short) b
,
300 (vector
signed short) a
);
304 static inline __m128i
305 vec_packs_epi32 (__m128i a
, __m128i b
)
307 #ifdef PIPE_ARCH_LITTLE_ENDIAN
308 return (__m128i
) vec_packs ((vector
signed int) a
, (vector
signed int) b
);
310 return (__m128i
) vec_packs ((vector
signed int) b
, (vector
signed int) a
);
314 #endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
316 #endif /* U_PWR8_H_ */