src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * YUV pixel format manipulation.
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  */
  35
  36
  37 #include "util/u_format.h"
  38 #include "util/u_cpu_detect.h"
  39
  40 #include "lp_bld_arit.h"
  41 #include "lp_bld_type.h"
  42 #include "lp_bld_const.h"
  43 #include "lp_bld_conv.h"
  44 #include "lp_bld_gather.h"
  45 #include "lp_bld_format.h"
  46 #include "lp_bld_init.h"
  47 #include "lp_bld_logic.h"
  48
  49 /**
  50  * Extract Y, U, V channels from packed UYVY.
  51  * @param packed  is a <n x i32> vector with the packed UYVY blocks
  52  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
  53  */
  54 static void
  55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
  56                 unsigned n,
  57                 LLVMValueRef packed,
  58                 LLVMValueRef i,
  59                 LLVMValueRef *y,
  60                 LLVMValueRef *u,
  61                 LLVMValueRef *v)
  62 {
  63    LLVMBuilderRef builder = gallivm->builder;
  64    struct lp_type type;
  65    LLVMValueRef mask;
  66
  67    memset(&type, 0, sizeof type);
  68    type.width = 32;
  69    type.length = n;
  70
  71    assert(lp_check_value(type, packed));
  72    assert(lp_check_value(type, i));
  73
  74    /*
  75     * Little endian:
  76     * y = (uyvy >> (16*i + 8)) & 0xff
  77     * u = (uyvy        ) & 0xff
  78     * v = (uyvy >> 16  ) & 0xff
  79     *
  80     * Big endian:
  81     * y = (uyvy >> (-16*i + 16)) & 0xff
  82     * u = (uyvy >> 24) & 0xff
  83     * v = (uyvy >>  8) & 0xff
  84     */
  85
  86 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  87    /*
  88     * Avoid shift with per-element count.
  89     * No support on x86, gets translated to roughly 5 instructions
  90     * per element. Didn't measure performance but cuts shader size
  91     * by quite a bit (less difference if cpu has no sse4.1 support).
  92     */
  93    if (util_cpu_caps.has_sse2 && n > 1) {
  94       LLVMValueRef sel, tmp, tmp2;
  95       struct lp_build_context bld32;
  96
  97       lp_build_context_init(&bld32, gallivm, type);
  98
  99       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
 100       tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
 101       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
 102       *y = lp_build_select(&bld32, sel, tmp, tmp2);
 103    } else
 104 #endif
 105    {
 106       LLVMValueRef shift;
 107 #if PIPE_ARCH_LITTLE_ENDIAN
 108       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
 109       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
 110 #else
 111       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
 112       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
 113 #endif
 114       *y = LLVMBuildLShr(builder, packed, shift, "");
 115    }
 116
 117 #if PIPE_ARCH_LITTLE_ENDIAN
 118    *u = packed;
 119    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
 120 #else
 121    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
 122    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
 123 #endif
 124
 125    mask = lp_build_const_int_vec(gallivm, type, 0xff);
 126
 127    *y = LLVMBuildAnd(builder, *y, mask, "y");
 128    *u = LLVMBuildAnd(builder, *u, mask, "u");
 129    *v = LLVMBuildAnd(builder, *v, mask, "v");
 130 }
 131
 132
 133 /**
 134  * Extract Y, U, V channels from packed YUYV.
 135  * @param packed  is a <n x i32> vector with the packed YUYV blocks
 136  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 137  */
 138 static void
 139 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
 140                 unsigned n,
 141                 LLVMValueRef packed,
 142                 LLVMValueRef i,
 143                 LLVMValueRef *y,
 144                 LLVMValueRef *u,
 145                 LLVMValueRef *v)
 146 {
 147    LLVMBuilderRef builder = gallivm->builder;
 148    struct lp_type type;
 149    LLVMValueRef mask;
 150
 151    memset(&type, 0, sizeof type);
 152    type.width = 32;
 153    type.length = n;
 154
 155    assert(lp_check_value(type, packed));
 156    assert(lp_check_value(type, i));
 157
 158    /*
 159    * Little endian:
 160     * y = (yuyv >> 16*i) & 0xff
 161     * u = (yuyv >> 8   ) & 0xff
 162     * v = (yuyv >> 24  ) & 0xff
 163     *
 164     * Big endian:
 165     * y = (yuyv >> (-16*i + 24) & 0xff
 166     * u = (yuyv >> 16)          & 0xff
 167     * v = (yuyv)                & 0xff
 168     */
 169
 170 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 171    /*
 172     * Avoid shift with per-element count.
 173     * No support on x86, gets translated to roughly 5 instructions
 174     * per element. Didn't measure performance but cuts shader size
 175     * by quite a bit (less difference if cpu has no sse4.1 support).
 176     */
 177    if (util_cpu_caps.has_sse2 && n > 1) {
 178       LLVMValueRef sel, tmp;
 179       struct lp_build_context bld32;
 180
 181       lp_build_context_init(&bld32, gallivm, type);
 182
 183       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
 184       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
 185        *y = lp_build_select(&bld32, sel, packed, tmp);
 186    } else
 187 #endif
 188    {
 189       LLVMValueRef shift;
 190 #if PIPE_ARCH_LITTLE_ENDIAN
 191       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
 192 #else
 193       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
 194       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
 195 #endif
 196       *y = LLVMBuildLShr(builder, packed, shift, "");
 197    }
 198
 199 #if PIPE_ARCH_LITTLE_ENDIAN
 200    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
 201    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
 202 #else
 203    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
 204    *v = packed;
 205 #endif
 206
 207    mask = lp_build_const_int_vec(gallivm, type, 0xff);
 208
 209    *y = LLVMBuildAnd(builder, *y, mask, "y");
 210    *u = LLVMBuildAnd(builder, *u, mask, "u");
 211    *v = LLVMBuildAnd(builder, *v, mask, "v");
 212 }
 213
 214
 215 static inline void
 216 yuv_to_rgb_soa(struct gallivm_state *gallivm,
 217                unsigned n,
 218                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
 219                LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
 220 {
 221    LLVMBuilderRef builder = gallivm->builder;
 222    struct lp_type type;
 223    struct lp_build_context bld;
 224
 225    LLVMValueRef c0;
 226    LLVMValueRef c8;
 227    LLVMValueRef c16;
 228    LLVMValueRef c128;
 229    LLVMValueRef c255;
 230
 231    LLVMValueRef cy;
 232    LLVMValueRef cug;
 233    LLVMValueRef cub;
 234    LLVMValueRef cvr;
 235    LLVMValueRef cvg;
 236
 237    memset(&type, 0, sizeof type);
 238    type.sign = TRUE;
 239    type.width = 32;
 240    type.length = n;
 241
 242    lp_build_context_init(&bld, gallivm, type);
 243
 244    assert(lp_check_value(type, y));
 245    assert(lp_check_value(type, u));
 246    assert(lp_check_value(type, v));
 247
 248    /*
 249     * Constants
 250     */
 251
 252    c0   = lp_build_const_int_vec(gallivm, type,   0);
 253    c8   = lp_build_const_int_vec(gallivm, type,   8);
 254    c16  = lp_build_const_int_vec(gallivm, type,  16);
 255    c128 = lp_build_const_int_vec(gallivm, type, 128);
 256    c255 = lp_build_const_int_vec(gallivm, type, 255);
 257
 258    cy  = lp_build_const_int_vec(gallivm, type,  298);
 259    cug = lp_build_const_int_vec(gallivm, type, -100);
 260    cub = lp_build_const_int_vec(gallivm, type,  516);
 261    cvr = lp_build_const_int_vec(gallivm, type,  409);
 262    cvg = lp_build_const_int_vec(gallivm, type, -208);
 263
 264    /*
 265     *  y -= 16;
 266     *  u -= 128;
 267     *  v -= 128;
 268     */
 269
 270    y = LLVMBuildSub(builder, y, c16, "");
 271    u = LLVMBuildSub(builder, u, c128, "");
 272    v = LLVMBuildSub(builder, v, c128, "");
 273
 274    /*
 275     * r = 298 * _y            + 409 * _v + 128;
 276     * g = 298 * _y - 100 * _u - 208 * _v + 128;
 277     * b = 298 * _y + 516 * _u            + 128;
 278     */
 279
 280    y = LLVMBuildMul(builder, y, cy, "");
 281    y = LLVMBuildAdd(builder, y, c128, "");
 282
 283    *r = LLVMBuildMul(builder, v, cvr, "");
 284    *g = LLVMBuildAdd(builder,
 285                      LLVMBuildMul(builder, u, cug, ""),
 286                      LLVMBuildMul(builder, v, cvg, ""),
 287                      "");
 288    *b = LLVMBuildMul(builder, u, cub, "");
 289
 290    *r = LLVMBuildAdd(builder, *r, y, "");
 291    *g = LLVMBuildAdd(builder, *g, y, "");
 292    *b = LLVMBuildAdd(builder, *b, y, "");
 293
 294    /*
 295     * r >>= 8;
 296     * g >>= 8;
 297     * b >>= 8;
 298     */
 299
 300    *r = LLVMBuildAShr(builder, *r, c8, "r");
 301    *g = LLVMBuildAShr(builder, *g, c8, "g");
 302    *b = LLVMBuildAShr(builder, *b, c8, "b");
 303
 304    /*
 305     * Clamp
 306     */
 307
 308    *r = lp_build_clamp(&bld, *r, c0, c255);
 309    *g = lp_build_clamp(&bld, *g, c0, c255);
 310    *b = lp_build_clamp(&bld, *b, c0, c255);
 311 }
 312
 313
 314 static LLVMValueRef
 315 rgb_to_rgba_aos(struct gallivm_state *gallivm,
 316                 unsigned n,
 317                 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
 318 {
 319    LLVMBuilderRef builder = gallivm->builder;
 320    struct lp_type type;
 321    LLVMValueRef a;
 322    LLVMValueRef rgba;
 323
 324    memset(&type, 0, sizeof type);
 325    type.sign = TRUE;
 326    type.width = 32;
 327    type.length = n;
 328
 329    assert(lp_check_value(type, r));
 330    assert(lp_check_value(type, g));
 331    assert(lp_check_value(type, b));
 332
 333    /*
 334     * Make a 4 x unorm8 vector
 335     */
 336
 337 #if PIPE_ARCH_LITTLE_ENDIAN
 338    r = r;
 339    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
 340    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
 341    a = lp_build_const_int_vec(gallivm, type, 0xff000000);
 342 #else
 343    r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
 344    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
 345    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
 346    a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
 347 #endif
 348
 349    rgba = r;
 350    rgba = LLVMBuildOr(builder, rgba, g, "");
 351    rgba = LLVMBuildOr(builder, rgba, b, "");
 352    rgba = LLVMBuildOr(builder, rgba, a, "");
 353
 354    rgba = LLVMBuildBitCast(builder, rgba,
 355                            LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
 356
 357    return rgba;
 358 }
 359
 360
 361 /**
 362  * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
 363  */
 364 static LLVMValueRef
 365 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
 366                  unsigned n,
 367                  LLVMValueRef packed,
 368                  LLVMValueRef i)
 369 {
 370    LLVMValueRef y, u, v;
 371    LLVMValueRef r, g, b;
 372    LLVMValueRef rgba;
 373
 374    uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
 375    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
 376    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 377
 378    return rgba;
 379 }
 380
 381
 382 /**
 383  * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
 384  */
 385 static LLVMValueRef
 386 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
 387                  unsigned n,
 388                  LLVMValueRef packed,
 389                  LLVMValueRef i)
 390 {
 391    LLVMValueRef y, u, v;
 392    LLVMValueRef r, g, b;
 393    LLVMValueRef rgba;
 394
 395    yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
 396    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
 397    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 398
 399    return rgba;
 400 }
 401
 402
 403 /**
 404  * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
 405  */
 406 static LLVMValueRef
 407 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
 408                  unsigned n,
 409                  LLVMValueRef packed,
 410                  LLVMValueRef i)
 411 {
 412    LLVMValueRef r, g, b;
 413    LLVMValueRef rgba;
 414
 415    uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
 416    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 417
 418    return rgba;
 419 }
 420
 421
 422 /**
 423  * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
 424  */
 425 static LLVMValueRef
 426 grgb_to_rgba_aos(struct gallivm_state *gallivm,
 427                  unsigned n,
 428                  LLVMValueRef packed,
 429                  LLVMValueRef i)
 430 {
 431    LLVMValueRef r, g, b;
 432    LLVMValueRef rgba;
 433
 434    yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
 435    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 436
 437    return rgba;
 438 }
 439
 440 /**
 441  * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
 442  */
 443 static LLVMValueRef
 444 grbr_to_rgba_aos(struct gallivm_state *gallivm,
 445                  unsigned n,
 446                  LLVMValueRef packed,
 447                  LLVMValueRef i)
 448 {
 449    LLVMValueRef r, g, b;
 450    LLVMValueRef rgba;
 451
 452    uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
 453    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 454
 455    return rgba;
 456 }
 457
 458
 459 /**
 460  * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
 461  */
 462 static LLVMValueRef
 463 rgrb_to_rgba_aos(struct gallivm_state *gallivm,
 464                  unsigned n,
 465                  LLVMValueRef packed,
 466                  LLVMValueRef i)
 467 {
 468    LLVMValueRef r, g, b;
 469    LLVMValueRef rgba;
 470
 471    yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
 472    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 473
 474    return rgba;
 475 }
 476
 477 /**
 478  * @param n  is the number of pixels processed
 479  * @param packed  is a <n x i32> vector with the packed YUYV blocks
 480  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 481  * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
 482  */
 483 LLVMValueRef
 484 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
 485                                    const struct util_format_description *format_desc,
 486                                    unsigned n,
 487                                    LLVMValueRef base_ptr,
 488                                    LLVMValueRef offset,
 489                                    LLVMValueRef i,
 490                                    LLVMValueRef j)
 491 {
 492    LLVMValueRef packed;
 493    LLVMValueRef rgba;
 494    struct lp_type fetch_type;
 495
 496    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
 497    assert(format_desc->block.bits == 32);
 498    assert(format_desc->block.width == 2);
 499    assert(format_desc->block.height == 1);
 500
 501    fetch_type = lp_type_uint(32);
 502    packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);
 503
 504    (void)j;
 505
 506    switch (format_desc->format) {
 507    case PIPE_FORMAT_UYVY:
 508       rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
 509       break;
 510    case PIPE_FORMAT_YUYV:
 511       rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
 512       break;
 513    case PIPE_FORMAT_R8G8_B8G8_UNORM:
 514       rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
 515       break;
 516    case PIPE_FORMAT_G8R8_G8B8_UNORM:
 517       rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
 518       break;
 519    case PIPE_FORMAT_G8R8_B8R8_UNORM:
 520       rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
 521       break;
 522    case PIPE_FORMAT_R8G8_R8B8_UNORM:
 523       rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
 524       break;
 525    default:
 526       assert(0);
 527       rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
 528       break;
 529    }
 530
 531    return rgba;
 532 }
 533