assert(lp_check_value(type, i));
/*
+ * Little endian:
* y = (uyvy >> (16*i + 8)) & 0xff
* u = (uyvy ) & 0xff
* v = (uyvy >> 16 ) & 0xff
+ *
+ * Big endian:
+ * y = (uyvy >> (-16*i + 16)) & 0xff
+ * u = (uyvy >> 24) & 0xff
+ * v = (uyvy >> 8) & 0xff
*/
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
* per element. Didn't measure performance but cuts shader size
* by quite a bit (less difference if cpu has no sse4.1 support).
*/
- if (util_cpu_caps.has_sse2 && n == 4) {
+ if (util_cpu_caps.has_sse2 && n > 1) {
LLVMValueRef sel, tmp, tmp2;
struct lp_build_context bld32;
#endif
{
LLVMValueRef shift;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
+#else
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
+#endif
*y = LLVMBuildLShr(builder, packed, shift, "");
}
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
*u = packed;
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+#endif
mask = lp_build_const_int_vec(gallivm, type, 0xff);
assert(lp_check_value(type, i));
/*
+ * Little endian:
* y = (yuyv >> 16*i) & 0xff
* u = (yuyv >> 8 ) & 0xff
* v = (yuyv >> 24 ) & 0xff
+ *
+ * Big endian:
+ * y = (yuyv >> (-16*i + 24) & 0xff
+ * u = (yuyv >> 16) & 0xff
+ * v = (yuyv) & 0xff
*/
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
* per element. Didn't measure performance but cuts shader size
* by quite a bit (less difference if cpu has no sse4.1 support).
*/
- if (util_cpu_caps.has_sse2 && n == 4) {
+ if (util_cpu_caps.has_sse2 && n > 1) {
LLVMValueRef sel, tmp;
struct lp_build_context bld32;
#endif
{
LLVMValueRef shift;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
+#endif
*y = LLVMBuildLShr(builder, packed, shift, "");
}
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+#else
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+ *v = packed;
+#endif
mask = lp_build_const_int_vec(gallivm, type, 0xff);
}
-static INLINE void
+static inline void
yuv_to_rgb_soa(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
* Make a 4 x unorm8 vector
*/
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
r = r;
g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
a = lp_build_const_int_vec(gallivm, type, 0xff000000);
+#else
+ r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
+ g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
+ b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
+ a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
+#endif
rgba = r;
rgba = LLVMBuildOr(builder, rgba, g, "");
assert(format_desc->block.width == 2);
assert(format_desc->block.height == 1);
- packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);
+ packed = lp_build_gather(gallivm, n, 32, 32, TRUE, base_ptr, offset, FALSE);
(void)j;