src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * YUV pixel format manipulation.
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  */
  35
  36
  37 #include "util/u_format.h"
  38 #include "util/u_cpu_detect.h"
  39
  40 #include "lp_bld_arit.h"
  41 #include "lp_bld_type.h"
  42 #include "lp_bld_const.h"
  43 #include "lp_bld_conv.h"
  44 #include "lp_bld_gather.h"
  45 #include "lp_bld_format.h"
  46 #include "lp_bld_logic.h"
  47
  48 /**
  49  * Extract Y, U, V channels from packed UYVY.
  50  * @param packed  is a <n x i32> vector with the packed UYVY blocks
  51  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
  52  */
  53 static void
  54 uyvy_to_yuv_soa(LLVMBuilderRef builder,
  55                 unsigned n,
  56                 LLVMValueRef packed,
  57                 LLVMValueRef i,
  58                 LLVMValueRef *y,
  59                 LLVMValueRef *u,
  60                 LLVMValueRef *v)
  61 {
  62    struct lp_type type;
  63    LLVMValueRef mask;
  64
  65    memset(&type, 0, sizeof type);
  66    type.width = 32;
  67    type.length = n;
  68
  69    assert(lp_check_value(type, packed));
  70    assert(lp_check_value(type, i));
  71
  72    /*
  73     * y = (uyvy >> (16*i + 8)) & 0xff
  74     * u = (uyvy        ) & 0xff
  75     * v = (uyvy >> 16  ) & 0xff
  76     */
  77
  78 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  79    /*
  80     * Avoid shift with per-element count.
  81     * No support on x86, gets translated to roughly 5 instructions
  82     * per element. Didn't measure performance but cuts shader size
  83     * by quite a bit (less difference if cpu has no sse4.1 support).
  84     */
  85    if (util_cpu_caps.has_sse2 && n == 4) {
  86       LLVMValueRef sel, tmp, tmp2;
  87       struct lp_build_context bld32;
  88
  89       lp_build_context_init(&bld32, builder, type);
  90
  91       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
  92       tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), "");
  93       sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
  94       *y = lp_build_select(&bld32, sel, tmp, tmp2);
  95    } else
  96 #endif
  97    {
  98       LLVMValueRef shift;
  99       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
 100       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
 101       *y = LLVMBuildLShr(builder, packed, shift, "");
 102    }
 103
 104    *u = packed;
 105    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
 106
 107    mask = lp_build_const_int_vec(type, 0xff);
 108
 109    *y = LLVMBuildAnd(builder, *y, mask, "y");
 110    *u = LLVMBuildAnd(builder, *u, mask, "u");
 111    *v = LLVMBuildAnd(builder, *v, mask, "v");
 112 }
 113
 114
 115 /**
 116  * Extract Y, U, V channels from packed YUYV.
 117  * @param packed  is a <n x i32> vector with the packed YUYV blocks
 118  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 119  */
 120 static void
 121 yuyv_to_yuv_soa(LLVMBuilderRef builder,
 122                 unsigned n,
 123                 LLVMValueRef packed,
 124                 LLVMValueRef i,
 125                 LLVMValueRef *y,
 126                 LLVMValueRef *u,
 127                 LLVMValueRef *v)
 128 {
 129    struct lp_type type;
 130    LLVMValueRef mask;
 131
 132    memset(&type, 0, sizeof type);
 133    type.width = 32;
 134    type.length = n;
 135
 136    assert(lp_check_value(type, packed));
 137    assert(lp_check_value(type, i));
 138
 139    /*
 140     * y = (yuyv >> 16*i) & 0xff
 141     * u = (yuyv >> 8   ) & 0xff
 142     * v = (yuyv >> 24  ) & 0xff
 143     */
 144
 145 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 146    /*
 147     * Avoid shift with per-element count.
 148     * No support on x86, gets translated to roughly 5 instructions
 149     * per element. Didn't measure performance but cuts shader size
 150     * by quite a bit (less difference if cpu has no sse4.1 support).
 151     */
 152    if (util_cpu_caps.has_sse2 && n == 4) {
 153       LLVMValueRef sel, tmp;
 154       struct lp_build_context bld32;
 155
 156       lp_build_context_init(&bld32, builder, type);
 157
 158       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
 159       sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
 160        *y = lp_build_select(&bld32, sel, packed, tmp);
 161    } else
 162 #endif
 163    {
 164       LLVMValueRef shift;
 165       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
 166       *y = LLVMBuildLShr(builder, packed, shift, "");
 167    }
 168
 169    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
 170    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
 171
 172    mask = lp_build_const_int_vec(type, 0xff);
 173
 174    *y = LLVMBuildAnd(builder, *y, mask, "y");
 175    *u = LLVMBuildAnd(builder, *u, mask, "u");
 176    *v = LLVMBuildAnd(builder, *v, mask, "v");
 177 }
 178
 179
 180 static INLINE void
 181 yuv_to_rgb_soa(LLVMBuilderRef builder,
 182                unsigned n,
 183                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
 184                LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
 185 {
 186    struct lp_type type;
 187    struct lp_build_context bld;
 188
 189    LLVMValueRef c0;
 190    LLVMValueRef c8;
 191    LLVMValueRef c16;
 192    LLVMValueRef c128;
 193    LLVMValueRef c255;
 194
 195    LLVMValueRef cy;
 196    LLVMValueRef cug;
 197    LLVMValueRef cub;
 198    LLVMValueRef cvr;
 199    LLVMValueRef cvg;
 200
 201    memset(&type, 0, sizeof type);
 202    type.sign = TRUE;
 203    type.width = 32;
 204    type.length = n;
 205
 206    lp_build_context_init(&bld, builder, type);
 207
 208    assert(lp_check_value(type, y));
 209    assert(lp_check_value(type, u));
 210    assert(lp_check_value(type, v));
 211
 212    /*
 213     * Constants
 214     */
 215
 216    c0   = lp_build_const_int_vec(type,   0);
 217    c8   = lp_build_const_int_vec(type,   8);
 218    c16  = lp_build_const_int_vec(type,  16);
 219    c128 = lp_build_const_int_vec(type, 128);
 220    c255 = lp_build_const_int_vec(type, 255);
 221
 222    cy  = lp_build_const_int_vec(type,  298);
 223    cug = lp_build_const_int_vec(type, -100);
 224    cub = lp_build_const_int_vec(type,  516);
 225    cvr = lp_build_const_int_vec(type,  409);
 226    cvg = lp_build_const_int_vec(type, -208);
 227
 228    /*
 229     *  y -= 16;
 230     *  u -= 128;
 231     *  v -= 128;
 232     */
 233
 234    y = LLVMBuildSub(builder, y, c16, "");
 235    u = LLVMBuildSub(builder, u, c128, "");
 236    v = LLVMBuildSub(builder, v, c128, "");
 237
 238    /*
 239     * r = 298 * _y            + 409 * _v + 128;
 240     * g = 298 * _y - 100 * _u - 208 * _v + 128;
 241     * b = 298 * _y + 516 * _u            + 128;
 242     */
 243
 244    y = LLVMBuildMul(builder, y, cy, "");
 245    y = LLVMBuildAdd(builder, y, c128, "");
 246
 247    *r = LLVMBuildMul(builder, v, cvr, "");
 248    *g = LLVMBuildAdd(builder,
 249                      LLVMBuildMul(builder, u, cug, ""),
 250                      LLVMBuildMul(builder, v, cvg, ""),
 251                      "");
 252    *b = LLVMBuildMul(builder, u, cub, "");
 253
 254    *r = LLVMBuildAdd(builder, *r, y, "");
 255    *g = LLVMBuildAdd(builder, *g, y, "");
 256    *b = LLVMBuildAdd(builder, *b, y, "");
 257
 258    /*
 259     * r >>= 8;
 260     * g >>= 8;
 261     * b >>= 8;
 262     */
 263
 264    *r = LLVMBuildAShr(builder, *r, c8, "r");
 265    *g = LLVMBuildAShr(builder, *g, c8, "g");
 266    *b = LLVMBuildAShr(builder, *b, c8, "b");
 267
 268    /*
 269     * Clamp
 270     */
 271
 272    *r = lp_build_clamp(&bld, *r, c0, c255);
 273    *g = lp_build_clamp(&bld, *g, c0, c255);
 274    *b = lp_build_clamp(&bld, *b, c0, c255);
 275 }
 276
 277
 278 static LLVMValueRef
 279 rgb_to_rgba_aos(LLVMBuilderRef builder,
 280                 unsigned n,
 281                 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
 282 {
 283    struct lp_type type;
 284    LLVMValueRef a;
 285    LLVMValueRef rgba;
 286
 287    memset(&type, 0, sizeof type);
 288    type.sign = TRUE;
 289    type.width = 32;
 290    type.length = n;
 291
 292    assert(lp_check_value(type, r));
 293    assert(lp_check_value(type, g));
 294    assert(lp_check_value(type, b));
 295
 296    /*
 297     * Make a 4 x unorm8 vector
 298     */
 299
 300    r = r;
 301    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), "");
 302    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), "");
 303    a = lp_build_const_int_vec(type, 0xff000000);
 304
 305    rgba = r;
 306    rgba = LLVMBuildOr(builder, rgba, g, "");
 307    rgba = LLVMBuildOr(builder, rgba, b, "");
 308    rgba = LLVMBuildOr(builder, rgba, a, "");
 309
 310    rgba = LLVMBuildBitCast(builder, rgba,
 311                            LLVMVectorType(LLVMInt8Type(), 4*n), "");
 312
 313    return rgba;
 314 }
 315
 316
 317 /**
 318  * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
 319  */
 320 static LLVMValueRef
 321 uyvy_to_rgba_aos(LLVMBuilderRef builder,
 322                  unsigned n,
 323                  LLVMValueRef packed,
 324                  LLVMValueRef i)
 325 {
 326    LLVMValueRef y, u, v;
 327    LLVMValueRef r, g, b;
 328    LLVMValueRef rgba;
 329
 330    uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
 331    yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
 332    rgba = rgb_to_rgba_aos(builder, n, r, g, b);
 333
 334    return rgba;
 335 }
 336
 337
 338 /**
 339  * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
 340  */
 341 static LLVMValueRef
 342 yuyv_to_rgba_aos(LLVMBuilderRef builder,
 343                  unsigned n,
 344                  LLVMValueRef packed,
 345                  LLVMValueRef i)
 346 {
 347    LLVMValueRef y, u, v;
 348    LLVMValueRef r, g, b;
 349    LLVMValueRef rgba;
 350
 351    yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
 352    yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
 353    rgba = rgb_to_rgba_aos(builder, n, r, g, b);
 354
 355    return rgba;
 356 }
 357
 358
 359 /**
 360  * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
 361  */
 362 static LLVMValueRef
 363 rgbg_to_rgba_aos(LLVMBuilderRef builder,
 364                  unsigned n,
 365                  LLVMValueRef packed,
 366                  LLVMValueRef i)
 367 {
 368    LLVMValueRef r, g, b;
 369    LLVMValueRef rgba;
 370
 371    uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
 372    rgba = rgb_to_rgba_aos(builder, n, r, g, b);
 373
 374    return rgba;
 375 }
 376
 377
 378 /**
 379  * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
 380  */
 381 static LLVMValueRef
 382 grgb_to_rgba_aos(LLVMBuilderRef builder,
 383                  unsigned n,
 384                  LLVMValueRef packed,
 385                  LLVMValueRef i)
 386 {
 387    LLVMValueRef r, g, b;
 388    LLVMValueRef rgba;
 389
 390    yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
 391    rgba = rgb_to_rgba_aos(builder, n, r, g, b);
 392
 393    return rgba;
 394 }
 395
 396
 397 /**
 398  * @param n  is the number of pixels processed
 399  * @param packed  is a <n x i32> vector with the packed YUYV blocks
 400  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 401  * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
 402  */
 403 LLVMValueRef
 404 lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
 405                                    const struct util_format_description *format_desc,
 406                                    unsigned n,
 407                                    LLVMValueRef base_ptr,
 408                                    LLVMValueRef offset,
 409                                    LLVMValueRef i,
 410                                    LLVMValueRef j)
 411 {
 412    LLVMValueRef packed;
 413    LLVMValueRef rgba;
 414
 415    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
 416    assert(format_desc->block.bits == 32);
 417    assert(format_desc->block.width == 2);
 418    assert(format_desc->block.height == 1);
 419
 420    packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset);
 421
 422    (void)j;
 423
 424    switch (format_desc->format) {
 425    case PIPE_FORMAT_UYVY:
 426       rgba = uyvy_to_rgba_aos(builder, n, packed, i);
 427       break;
 428    case PIPE_FORMAT_YUYV:
 429       rgba = yuyv_to_rgba_aos(builder, n, packed, i);
 430       break;
 431    case PIPE_FORMAT_R8G8_B8G8_UNORM:
 432       rgba = rgbg_to_rgba_aos(builder, n, packed, i);
 433       break;
 434    case PIPE_FORMAT_G8R8_G8B8_UNORM:
 435       rgba = grgb_to_rgba_aos(builder, n, packed, i);
 436       break;
 437    default:
 438       assert(0);
 439       rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n));
 440       break;
 441    }
 442
 443    return rgba;
 444 }
 445