#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_arit.h"
-#include "lp_bld_init.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_format.h"
-
+#include "lp_bld_init.h"
+#include "lp_bld_logic.h"
/**
* Extract Y, U, V channels from packed UYVY.
* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
*/
static void
-uyvy_to_yuv_soa(LLVMBuilderRef builder,
+uyvy_to_yuv_soa(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i,
LLVMValueRef *u,
LLVMValueRef *v)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type type;
- LLVMValueRef shift, mask;
+ LLVMValueRef mask;
memset(&type, 0, sizeof type);
type.width = 32;
assert(lp_check_value(type, i));
/*
- * y = (uyvy >> 16*i) & 0xff
+ * Little endian:
+ * y = (uyvy >> (16*i + 8)) & 0xff
* u = (uyvy ) & 0xff
* v = (uyvy >> 16 ) & 0xff
+ *
+ * Big endian:
+ * y = (uyvy >> (-16*i + 16)) & 0xff
+ * u = (uyvy >> 24) & 0xff
+ * v = (uyvy >> 8) & 0xff
*/
- shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
- shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
- *y = LLVMBuildLShr(builder, packed, shift, "");
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * Avoid shift with per-element count.
+ * No support on x86, gets translated to roughly 5 instructions
+ * per element. Didn't measure performance but cuts shader size
+ * by quite a bit (less difference if cpu has no sse4.1 support).
+ */
+ if (util_cpu_caps.has_sse2 && n > 1) {
+ LLVMValueRef sel, tmp, tmp2;
+ struct lp_build_context bld32;
+
+ lp_build_context_init(&bld32, gallivm, type);
+
+ tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+ tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
+ sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
+ *y = lp_build_select(&bld32, sel, tmp, tmp2);
+ } else
+#endif
+ {
+ LLVMValueRef shift;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
+#else
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
+#endif
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ }
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
*u = packed;
- *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+#endif
- mask = lp_build_const_int_vec(type, 0xff);
+ mask = lp_build_const_int_vec(gallivm, type, 0xff);
*y = LLVMBuildAnd(builder, *y, mask, "y");
*u = LLVMBuildAnd(builder, *u, mask, "u");
* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
*/
static void
-yuyv_to_yuv_soa(LLVMBuilderRef builder,
+yuyv_to_yuv_soa(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i,
LLVMValueRef *u,
LLVMValueRef *v)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type type;
- LLVMValueRef shift, mask;
+ LLVMValueRef mask;
memset(&type, 0, sizeof type);
type.width = 32;
assert(lp_check_value(type, i));
/*
+ * Little endian:
* y = (yuyv >> 16*i) & 0xff
* u = (yuyv >> 8 ) & 0xff
* v = (yuyv >> 24 ) & 0xff
+ *
+ * Big endian:
+ * y = (yuyv >> (-16*i + 24) & 0xff
+ * u = (yuyv >> 16) & 0xff
+ * v = (yuyv) & 0xff
*/
- shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
- *y = LLVMBuildLShr(builder, packed, shift, "");
- *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
- *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * Avoid shift with per-element count.
+ * No support on x86, gets translated to roughly 5 instructions
+ * per element. Didn't measure performance but cuts shader size
+ * by quite a bit (less difference if cpu has no sse4.1 support).
+ */
+ if (util_cpu_caps.has_sse2 && n > 1) {
+ LLVMValueRef sel, tmp;
+ struct lp_build_context bld32;
+
+ lp_build_context_init(&bld32, gallivm, type);
+
+ tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+ sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
+ *y = lp_build_select(&bld32, sel, packed, tmp);
+ } else
+#endif
+ {
+ LLVMValueRef shift;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
+#endif
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ }
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+#else
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+ *v = packed;
+#endif
- mask = lp_build_const_int_vec(type, 0xff);
+ mask = lp_build_const_int_vec(gallivm, type, 0xff);
*y = LLVMBuildAnd(builder, *y, mask, "y");
*u = LLVMBuildAnd(builder, *u, mask, "u");
static INLINE void
-yuv_to_rgb_soa(LLVMBuilderRef builder,
+yuv_to_rgb_soa(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type type;
struct lp_build_context bld;
type.width = 32;
type.length = n;
- lp_build_context_init(&bld, builder, type);
+ lp_build_context_init(&bld, gallivm, type);
assert(lp_check_value(type, y));
assert(lp_check_value(type, u));
* Constants
*/
- c0 = lp_build_const_int_vec(type, 0);
- c8 = lp_build_const_int_vec(type, 8);
- c16 = lp_build_const_int_vec(type, 16);
- c128 = lp_build_const_int_vec(type, 128);
- c255 = lp_build_const_int_vec(type, 255);
+ c0 = lp_build_const_int_vec(gallivm, type, 0);
+ c8 = lp_build_const_int_vec(gallivm, type, 8);
+ c16 = lp_build_const_int_vec(gallivm, type, 16);
+ c128 = lp_build_const_int_vec(gallivm, type, 128);
+ c255 = lp_build_const_int_vec(gallivm, type, 255);
- cy = lp_build_const_int_vec(type, 298);
- cug = lp_build_const_int_vec(type, -100);
- cub = lp_build_const_int_vec(type, 516);
- cvr = lp_build_const_int_vec(type, 409);
- cvg = lp_build_const_int_vec(type, -208);
+ cy = lp_build_const_int_vec(gallivm, type, 298);
+ cug = lp_build_const_int_vec(gallivm, type, -100);
+ cub = lp_build_const_int_vec(gallivm, type, 516);
+ cvr = lp_build_const_int_vec(gallivm, type, 409);
+ cvg = lp_build_const_int_vec(gallivm, type, -208);
/*
* y -= 16;
static LLVMValueRef
-rgb_to_rgba_aos(LLVMBuilderRef builder,
+rgb_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type type;
LLVMValueRef a;
LLVMValueRef rgba;
* Make a 4 x unorm8 vector
*/
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
r = r;
- g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), "");
- b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), "");
- a = lp_build_const_int_vec(type, 0xff000000);
+ g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
+ b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
+ a = lp_build_const_int_vec(gallivm, type, 0xff000000);
+#else
+ r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
+ g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
+ b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
+ a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
+#endif
rgba = r;
rgba = LLVMBuildOr(builder, rgba, g, "");
rgba = LLVMBuildOr(builder, rgba, a, "");
rgba = LLVMBuildBitCast(builder, rgba,
- LLVMVectorType(LLVMInt8Type(), 4*n), "");
+ LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
return rgba;
}
* Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
*/
static LLVMValueRef
-uyvy_to_rgba_aos(LLVMBuilderRef builder,
+uyvy_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i)
LLVMValueRef r, g, b;
LLVMValueRef rgba;
- uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
- yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
- rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+ uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
return rgba;
}
* Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
*/
static LLVMValueRef
-yuyv_to_rgba_aos(LLVMBuilderRef builder,
+yuyv_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i)
LLVMValueRef r, g, b;
LLVMValueRef rgba;
- yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
- yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
- rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+ yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
return rgba;
}
* Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
*/
static LLVMValueRef
-rgbg_to_rgba_aos(LLVMBuilderRef builder,
+rgbg_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i)
LLVMValueRef r, g, b;
LLVMValueRef rgba;
- uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
- rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+ uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
return rgba;
}
* Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
*/
static LLVMValueRef
-grgb_to_rgba_aos(LLVMBuilderRef builder,
+grgb_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i)
LLVMValueRef r, g, b;
LLVMValueRef rgba;
- yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
- rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+ yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
return rgba;
}
+/**
+ * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+grbr_to_rgba_aos(struct gallivm_state *gallivm,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+rgrb_to_rgba_aos(struct gallivm_state *gallivm,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
+
+ return rgba;
+}
/**
* @param n is the number of pixels processed
* @return a <4*n x i8> vector with the pixel RGBA values in AoS
*/
LLVMValueRef
-lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
unsigned n,
LLVMValueRef base_ptr,
assert(format_desc->block.width == 2);
assert(format_desc->block.height == 1);
- packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset);
+ packed = lp_build_gather(gallivm, n, 32, 32, TRUE, base_ptr, offset, FALSE);
(void)j;
switch (format_desc->format) {
case PIPE_FORMAT_UYVY:
- rgba = uyvy_to_rgba_aos(builder, n, packed, i);
+ rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
break;
case PIPE_FORMAT_YUYV:
- rgba = yuyv_to_rgba_aos(builder, n, packed, i);
+ rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
break;
case PIPE_FORMAT_R8G8_B8G8_UNORM:
- rgba = rgbg_to_rgba_aos(builder, n, packed, i);
+ rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
break;
case PIPE_FORMAT_G8R8_G8B8_UNORM:
- rgba = grgb_to_rgba_aos(builder, n, packed, i);
+ rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
+ break;
+ case PIPE_FORMAT_G8R8_B8R8_UNORM:
+ rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
+ break;
+ case PIPE_FORMAT_R8G8_R8B8_UNORM:
+ rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
break;
default:
assert(0);
- rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n));
+ rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
break;
}