X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fgallivm%2Flp_bld_format_yuv.c;h=cdf1956c093ec6f5a6ada5451bc700634a0cf0ae;hb=4a72d859b4f8d0444eb7f38606d59d7ddc9ea8fa;hp=d3eba50b77e551e2add702e2677502c2bc1732d1;hpb=bb1546f55be3b243b71d39e5fb7457c5b21e32c9;p=mesa.git diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c index d3eba50b77e..cdf1956c093 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c @@ -35,18 +35,16 @@ #include "util/u_format.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_string.h" +#include "util/u_cpu_detect.h" #include "lp_bld_arit.h" -#include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_gather.h" #include "lp_bld_format.h" - +#include "lp_bld_init.h" +#include "lp_bld_logic.h" /** * Extract Y, U, V channels from packed UYVY. @@ -54,7 +52,7 @@ * @param i is a vector with the x pixel coordinate (0 or 1) */ static void -uyvy_to_yuv_soa(LLVMBuilderRef builder, +uyvy_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, @@ -62,8 +60,9 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, LLVMValueRef *u, LLVMValueRef *v) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type type; - LLVMValueRef shift, mask; + LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; @@ -73,18 +72,41 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, assert(lp_check_value(type, i)); /* - * y = (uyvy >> 16*i) & 0xff + * y = (uyvy >> (16*i + 8)) & 0xff * u = (uyvy ) & 0xff * v = (uyvy >> 16 ) & 0xff */ - shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); - shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), ""); - *y = LLVMBuildLShr(builder, packed, shift, ""); +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * Avoid shift with per-element count. + * No support on x86, gets translated to roughly 5 instructions + * per element. Didn't measure performance but cuts shader size + * by quite a bit (less difference if cpu has no sse4.1 support). + */ + if (util_cpu_caps.has_sse2 && n == 4) { + LLVMValueRef sel, tmp, tmp2; + struct lp_build_context bld32; + + lp_build_context_init(&bld32, gallivm, type); + + tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); + tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), ""); + sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); + *y = lp_build_select(&bld32, sel, tmp, tmp2); + } else +#endif + { + LLVMValueRef shift; + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); + shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), ""); + *y = LLVMBuildLShr(builder, packed, shift, ""); + } + *u = packed; - *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), ""); + *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); - mask = lp_build_const_int_vec(type, 0xff); + mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); @@ -98,7 +120,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder, * @param i is a vector with the x pixel coordinate (0 or 1) */ static void -yuyv_to_yuv_soa(LLVMBuilderRef builder, +yuyv_to_yuv_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i, @@ -106,8 +128,9 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, LLVMValueRef *u, LLVMValueRef *v) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type type; - LLVMValueRef shift, mask; + LLVMValueRef mask; memset(&type, 0, sizeof type); type.width = 32; @@ -122,12 +145,34 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, * v = (yuyv >> 24 ) & 0xff */ - shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), ""); - *y = LLVMBuildLShr(builder, packed, shift, ""); - *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), ""); - *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), ""); +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * Avoid shift with per-element count. + * No support on x86, gets translated to roughly 5 instructions + * per element. Didn't measure performance but cuts shader size + * by quite a bit (less difference if cpu has no sse4.1 support). + */ + if (util_cpu_caps.has_sse2 && n == 4) { + LLVMValueRef sel, tmp; + struct lp_build_context bld32; + + lp_build_context_init(&bld32, gallivm, type); + + tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); + sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); + *y = lp_build_select(&bld32, sel, packed, tmp); + } else +#endif + { + LLVMValueRef shift; + shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); + *y = LLVMBuildLShr(builder, packed, shift, ""); + } + + *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); + *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); - mask = lp_build_const_int_vec(type, 0xff); + mask = lp_build_const_int_vec(gallivm, type, 0xff); *y = LLVMBuildAnd(builder, *y, mask, "y"); *u = LLVMBuildAnd(builder, *u, mask, "u"); @@ -136,11 +181,12 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder, static INLINE void -yuv_to_rgb_soa(LLVMBuilderRef builder, +yuv_to_rgb_soa(struct gallivm_state *gallivm, unsigned n, LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type type; struct lp_build_context bld; @@ -161,7 +207,7 @@ yuv_to_rgb_soa(LLVMBuilderRef builder, type.width = 32; type.length = n; - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&bld, gallivm, type); assert(lp_check_value(type, y)); assert(lp_check_value(type, u)); @@ -171,17 +217,17 @@ yuv_to_rgb_soa(LLVMBuilderRef builder, * Constants */ - c0 = lp_build_const_int_vec(type, 0); - c8 = lp_build_const_int_vec(type, 8); - c16 = lp_build_const_int_vec(type, 16); - c128 = lp_build_const_int_vec(type, 128); - c255 = lp_build_const_int_vec(type, 255); + c0 = lp_build_const_int_vec(gallivm, type, 0); + c8 = lp_build_const_int_vec(gallivm, type, 8); + c16 = lp_build_const_int_vec(gallivm, type, 16); + c128 = lp_build_const_int_vec(gallivm, type, 128); + c255 = lp_build_const_int_vec(gallivm, type, 255); - cy = lp_build_const_int_vec(type, 298); - cug = lp_build_const_int_vec(type, -100); - cub = lp_build_const_int_vec(type, 516); - cvr = lp_build_const_int_vec(type, 409); - cvg = lp_build_const_int_vec(type, -208); + cy = lp_build_const_int_vec(gallivm, type, 298); + cug = lp_build_const_int_vec(gallivm, type, -100); + cub = lp_build_const_int_vec(gallivm, type, 516); + cvr = lp_build_const_int_vec(gallivm, type, 409); + cvg = lp_build_const_int_vec(gallivm, type, -208); /* * y -= 16; @@ -234,10 +280,11 @@ yuv_to_rgb_soa(LLVMBuilderRef builder, static LLVMValueRef -rgb_to_rgba_aos(LLVMBuilderRef builder, +rgb_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) { + LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef a; LLVMValueRef rgba; @@ -256,9 +303,9 @@ rgb_to_rgba_aos(LLVMBuilderRef builder, */ r = r; - g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), ""); - b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), ""); - a = lp_build_const_int_vec(type, 0xff000000); + g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), ""); + b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), ""); + a = lp_build_const_int_vec(gallivm, type, 0xff000000); rgba = r; rgba = LLVMBuildOr(builder, rgba, g, ""); @@ -266,7 +313,7 @@ rgb_to_rgba_aos(LLVMBuilderRef builder, rgba = LLVMBuildOr(builder, rgba, a, ""); rgba = LLVMBuildBitCast(builder, rgba, - LLVMVectorType(LLVMInt8Type(), 4*n), ""); + LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), ""); return rgba; } @@ -276,7 +323,7 @@ rgb_to_rgba_aos(LLVMBuilderRef builder, * Convert from packed UYVY to <4n x i8> RGBA AoS */ static LLVMValueRef -uyvy_to_rgba_aos(LLVMBuilderRef builder, +uyvy_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i) @@ -285,9 +332,9 @@ uyvy_to_rgba_aos(LLVMBuilderRef builder, LLVMValueRef r, g, b; LLVMValueRef rgba; - uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v); - yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b); - rgba = rgb_to_rgba_aos(builder, n, r, g, b); + uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); + yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); + rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); return rgba; } @@ -297,7 +344,7 @@ uyvy_to_rgba_aos(LLVMBuilderRef builder, * Convert from packed YUYV to <4n x i8> RGBA AoS */ static LLVMValueRef -yuyv_to_rgba_aos(LLVMBuilderRef builder, +yuyv_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i) @@ -306,9 +353,9 @@ yuyv_to_rgba_aos(LLVMBuilderRef builder, LLVMValueRef r, g, b; LLVMValueRef rgba; - yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v); - yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b); - rgba = rgb_to_rgba_aos(builder, n, r, g, b); + yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); + yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); + rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); return rgba; } @@ -318,7 +365,7 @@ yuyv_to_rgba_aos(LLVMBuilderRef builder, * Convert from packed RG_BG to <4n x i8> RGBA AoS */ static LLVMValueRef -rgbg_to_rgba_aos(LLVMBuilderRef builder, +rgbg_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i) @@ -326,8 +373,8 @@ rgbg_to_rgba_aos(LLVMBuilderRef builder, LLVMValueRef r, g, b; LLVMValueRef rgba; - uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b); - rgba = rgb_to_rgba_aos(builder, n, r, g, b); + uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); + rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); return rgba; } @@ -337,7 +384,7 @@ rgbg_to_rgba_aos(LLVMBuilderRef builder, * Convert from packed GR_GB to <4n x i8> RGBA AoS */ static LLVMValueRef -grgb_to_rgba_aos(LLVMBuilderRef builder, +grgb_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef packed, LLVMValueRef i) @@ -345,8 +392,8 @@ grgb_to_rgba_aos(LLVMBuilderRef builder, LLVMValueRef r, g, b; LLVMValueRef rgba; - yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b); - rgba = rgb_to_rgba_aos(builder, n, r, g, b); + yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); + rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); return rgba; } @@ -359,35 +406,42 @@ grgb_to_rgba_aos(LLVMBuilderRef builder, * @return a <4*n x i8> vector with the pixel RGBA values in AoS */ LLVMValueRef -lp_build_unpack_subsampled_to_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - unsigned n, - LLVMValueRef packed, - LLVMValueRef i, - LLVMValueRef j) +lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j) { + LLVMValueRef packed; LLVMValueRef rgba; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); + assert(format_desc->block.bits == 32); + assert(format_desc->block.width == 2); + assert(format_desc->block.height == 1); + + packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset); (void)j; switch (format_desc->format) { case PIPE_FORMAT_UYVY: - rgba = uyvy_to_rgba_aos(builder, n, packed, i); + rgba = uyvy_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_YUYV: - rgba = yuyv_to_rgba_aos(builder, n, packed, i); + rgba = yuyv_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_R8G8_B8G8_UNORM: - rgba = rgbg_to_rgba_aos(builder, n, packed, i); + rgba = rgbg_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_G8R8_G8B8_UNORM: - rgba = grgb_to_rgba_aos(builder, n, packed, i); + rgba = grgb_to_rgba_aos(gallivm, n, packed, i); break; default: assert(0); - rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n)); + rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n)); break; }