*/
-#include "util/u_format.h"
+#include "util/format/u_format.h"
#include "util/u_cpu_detect.h"
#include "lp_bld_arit.h"
#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_format.h"
+#include "lp_bld_init.h"
#include "lp_bld_logic.h"
/**
* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
*/
static void
-uyvy_to_yuv_soa(LLVMBuilderRef builder,
+uyvy_to_yuv_soa(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i,
LLVMValueRef *u,
LLVMValueRef *v)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type type;
LLVMValueRef mask;
assert(lp_check_value(type, i));
/*
+ * Little endian:
* y = (uyvy >> (16*i + 8)) & 0xff
* u = (uyvy ) & 0xff
* v = (uyvy >> 16 ) & 0xff
+ *
+ * Big endian:
+ * y = (uyvy >> (-16*i + 16)) & 0xff
+ * u = (uyvy >> 24) & 0xff
+ * v = (uyvy >> 8) & 0xff
*/
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
* per element. Didn't measure performance but cuts shader size
* by quite a bit (less difference if cpu has no sse4.1 support).
*/
- if (util_cpu_caps.has_sse2 && n == 4) {
+ if (util_cpu_caps.has_sse2 && n > 1) {
LLVMValueRef sel, tmp, tmp2;
struct lp_build_context bld32;
- lp_build_context_init(&bld32, builder, type);
+ lp_build_context_init(&bld32, gallivm, type);
- tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
- tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), "");
- sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
+ tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+ tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
+ sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
*y = lp_build_select(&bld32, sel, tmp, tmp2);
} else
#endif
{
LLVMValueRef shift;
- shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
- shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
+#if UTIL_ARCH_LITTLE_ENDIAN
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
+#else
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
+#endif
*y = LLVMBuildLShr(builder, packed, shift, "");
}
+#if UTIL_ARCH_LITTLE_ENDIAN
*u = packed;
- *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+#endif
- mask = lp_build_const_int_vec(type, 0xff);
+ mask = lp_build_const_int_vec(gallivm, type, 0xff);
*y = LLVMBuildAnd(builder, *y, mask, "y");
*u = LLVMBuildAnd(builder, *u, mask, "u");
* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
*/
static void
-yuyv_to_yuv_soa(LLVMBuilderRef builder,
+yuyv_to_yuv_soa(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i,
LLVMValueRef *u,
LLVMValueRef *v)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type type;
LLVMValueRef mask;
assert(lp_check_value(type, i));
/*
+ * Little endian:
* y = (yuyv >> 16*i) & 0xff
* u = (yuyv >> 8 ) & 0xff
* v = (yuyv >> 24 ) & 0xff
+ *
+ * Big endian:
+ * y = (yuyv >> (-16*i + 24) & 0xff
+ * u = (yuyv >> 16) & 0xff
+ * v = (yuyv) & 0xff
*/
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
* per element. Didn't measure performance but cuts shader size
* by quite a bit (less difference if cpu has no sse4.1 support).
*/
- if (util_cpu_caps.has_sse2 && n == 4) {
+ if (util_cpu_caps.has_sse2 && n > 1) {
LLVMValueRef sel, tmp;
struct lp_build_context bld32;
- lp_build_context_init(&bld32, builder, type);
+ lp_build_context_init(&bld32, gallivm, type);
- tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
- sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
+ tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+ sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
*y = lp_build_select(&bld32, sel, packed, tmp);
} else
#endif
{
LLVMValueRef shift;
- shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+#if UTIL_ARCH_LITTLE_ENDIAN
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
+#else
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
+#endif
*y = LLVMBuildLShr(builder, packed, shift, "");
}
- *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
- *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
+#if UTIL_ARCH_LITTLE_ENDIAN
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
+#else
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
+ *v = packed;
+#endif
- mask = lp_build_const_int_vec(type, 0xff);
+ mask = lp_build_const_int_vec(gallivm, type, 0xff);
*y = LLVMBuildAnd(builder, *y, mask, "y");
*u = LLVMBuildAnd(builder, *u, mask, "u");
}
-static INLINE void
-yuv_to_rgb_soa(LLVMBuilderRef builder,
+static inline void
+yuv_to_rgb_soa(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type type;
struct lp_build_context bld;
type.width = 32;
type.length = n;
- lp_build_context_init(&bld, builder, type);
+ lp_build_context_init(&bld, gallivm, type);
assert(lp_check_value(type, y));
assert(lp_check_value(type, u));
* Constants
*/
- c0 = lp_build_const_int_vec(type, 0);
- c8 = lp_build_const_int_vec(type, 8);
- c16 = lp_build_const_int_vec(type, 16);
- c128 = lp_build_const_int_vec(type, 128);
- c255 = lp_build_const_int_vec(type, 255);
+ c0 = lp_build_const_int_vec(gallivm, type, 0);
+ c8 = lp_build_const_int_vec(gallivm, type, 8);
+ c16 = lp_build_const_int_vec(gallivm, type, 16);
+ c128 = lp_build_const_int_vec(gallivm, type, 128);
+ c255 = lp_build_const_int_vec(gallivm, type, 255);
- cy = lp_build_const_int_vec(type, 298);
- cug = lp_build_const_int_vec(type, -100);
- cub = lp_build_const_int_vec(type, 516);
- cvr = lp_build_const_int_vec(type, 409);
- cvg = lp_build_const_int_vec(type, -208);
+ cy = lp_build_const_int_vec(gallivm, type, 298);
+ cug = lp_build_const_int_vec(gallivm, type, -100);
+ cub = lp_build_const_int_vec(gallivm, type, 516);
+ cvr = lp_build_const_int_vec(gallivm, type, 409);
+ cvg = lp_build_const_int_vec(gallivm, type, -208);
/*
* y -= 16;
static LLVMValueRef
-rgb_to_rgba_aos(LLVMBuilderRef builder,
+rgb_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_type type;
LLVMValueRef a;
LLVMValueRef rgba;
* Make a 4 x unorm8 vector
*/
+#if UTIL_ARCH_LITTLE_ENDIAN
r = r;
- g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), "");
- b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), "");
- a = lp_build_const_int_vec(type, 0xff000000);
+ g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
+ b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
+ a = lp_build_const_int_vec(gallivm, type, 0xff000000);
+#else
+ r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
+ g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
+ b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
+ a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
+#endif
rgba = r;
rgba = LLVMBuildOr(builder, rgba, g, "");
rgba = LLVMBuildOr(builder, rgba, a, "");
rgba = LLVMBuildBitCast(builder, rgba,
- LLVMVectorType(LLVMInt8Type(), 4*n), "");
+ LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
return rgba;
}
* Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
*/
static LLVMValueRef
-uyvy_to_rgba_aos(LLVMBuilderRef builder,
+uyvy_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i)
LLVMValueRef r, g, b;
LLVMValueRef rgba;
- uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
- yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
- rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+ uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
return rgba;
}
* Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
*/
static LLVMValueRef
-yuyv_to_rgba_aos(LLVMBuilderRef builder,
+yuyv_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i)
LLVMValueRef r, g, b;
LLVMValueRef rgba;
- yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
- yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
- rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+ yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
return rgba;
}
* Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
*/
static LLVMValueRef
-rgbg_to_rgba_aos(LLVMBuilderRef builder,
+rgbg_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i)
LLVMValueRef r, g, b;
LLVMValueRef rgba;
- uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
- rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+ uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
return rgba;
}
* Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
*/
static LLVMValueRef
-grgb_to_rgba_aos(LLVMBuilderRef builder,
+grgb_to_rgba_aos(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef packed,
LLVMValueRef i)
LLVMValueRef r, g, b;
LLVMValueRef rgba;
- yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
- rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+ yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
+
+ return rgba;
+}
+
+/**
+ * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+grbr_to_rgba_aos(struct gallivm_state *gallivm,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
return rgba;
}
+/**
+ * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+rgrb_to_rgba_aos(struct gallivm_state *gallivm,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
+
+ return rgba;
+}
+
/**
* @param n is the number of pixels processed
* @param packed is a <n x i32> vector with the packed YUYV blocks
* @return a <4*n x i8> vector with the pixel RGBA values in AoS
*/
LLVMValueRef
-lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
unsigned n,
LLVMValueRef base_ptr,
{
LLVMValueRef packed;
LLVMValueRef rgba;
+ struct lp_type fetch_type;
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
assert(format_desc->block.bits == 32);
assert(format_desc->block.width == 2);
assert(format_desc->block.height == 1);
- packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset);
+ fetch_type = lp_type_uint(32);
+ packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);
(void)j;
switch (format_desc->format) {
case PIPE_FORMAT_UYVY:
- rgba = uyvy_to_rgba_aos(builder, n, packed, i);
+ rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
break;
case PIPE_FORMAT_YUYV:
- rgba = yuyv_to_rgba_aos(builder, n, packed, i);
+ rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
break;
case PIPE_FORMAT_R8G8_B8G8_UNORM:
- rgba = rgbg_to_rgba_aos(builder, n, packed, i);
+ rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
break;
case PIPE_FORMAT_G8R8_G8B8_UNORM:
- rgba = grgb_to_rgba_aos(builder, n, packed, i);
+ rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
+ break;
+ case PIPE_FORMAT_G8R8_B8R8_UNORM:
+ rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
+ break;
+ case PIPE_FORMAT_R8G8_R8B8_UNORM:
+ rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
break;
default:
assert(0);
- rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n));
+ rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
break;
}