src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * YUV pixel format manipulation.
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  */
  35
  36
  37 #include "util/u_format.h"
  38 #include "util/u_cpu_detect.h"
  39
  40 #include "lp_bld_arit.h"
  41 #include "lp_bld_type.h"
  42 #include "lp_bld_const.h"
  43 #include "lp_bld_conv.h"
  44 #include "lp_bld_gather.h"
  45 #include "lp_bld_format.h"
  46 #include "lp_bld_init.h"
  47 #include "lp_bld_logic.h"
  48
  49 /**
  50  * Extract Y, U, V channels from packed UYVY.
  51  * @param packed  is a <n x i32> vector with the packed UYVY blocks
  52  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
  53  */
  54 static void
  55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
  56                 unsigned n,
  57                 LLVMValueRef packed,
  58                 LLVMValueRef i,
  59                 LLVMValueRef *y,
  60                 LLVMValueRef *u,
  61                 LLVMValueRef *v)
  62 {
  63    LLVMBuilderRef builder = gallivm->builder;
  64    struct lp_type type;
  65    LLVMValueRef mask;
  66
  67    memset(&type, 0, sizeof type);
  68    type.width = 32;
  69    type.length = n;
  70
  71    assert(lp_check_value(type, packed));
  72    assert(lp_check_value(type, i));
  73
  74    /*
  75     * y = (uyvy >> (16*i + 8)) & 0xff
  76     * u = (uyvy        ) & 0xff
  77     * v = (uyvy >> 16  ) & 0xff
  78     */
  79
  80 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  81    /*
  82     * Avoid shift with per-element count.
  83     * No support on x86, gets translated to roughly 5 instructions
  84     * per element. Didn't measure performance but cuts shader size
  85     * by quite a bit (less difference if cpu has no sse4.1 support).
  86     */
  87    if (util_cpu_caps.has_sse2 && n == 4) {
  88       LLVMValueRef sel, tmp, tmp2;
  89       struct lp_build_context bld32;
  90
  91       lp_build_context_init(&bld32, gallivm, type);
  92
  93       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
  94       tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
  95       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
  96       *y = lp_build_select(&bld32, sel, tmp, tmp2);
  97    } else
  98 #endif
  99    {
 100       LLVMValueRef shift;
 101       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
 102       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
 103       *y = LLVMBuildLShr(builder, packed, shift, "");
 104    }
 105
 106    *u = packed;
 107    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
 108
 109    mask = lp_build_const_int_vec(gallivm, type, 0xff);
 110
 111    *y = LLVMBuildAnd(builder, *y, mask, "y");
 112    *u = LLVMBuildAnd(builder, *u, mask, "u");
 113    *v = LLVMBuildAnd(builder, *v, mask, "v");
 114 }
 115
 116
 117 /**
 118  * Extract Y, U, V channels from packed YUYV.
 119  * @param packed  is a <n x i32> vector with the packed YUYV blocks
 120  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 121  */
 122 static void
 123 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
 124                 unsigned n,
 125                 LLVMValueRef packed,
 126                 LLVMValueRef i,
 127                 LLVMValueRef *y,
 128                 LLVMValueRef *u,
 129                 LLVMValueRef *v)
 130 {
 131    LLVMBuilderRef builder = gallivm->builder;
 132    struct lp_type type;
 133    LLVMValueRef mask;
 134
 135    memset(&type, 0, sizeof type);
 136    type.width = 32;
 137    type.length = n;
 138
 139    assert(lp_check_value(type, packed));
 140    assert(lp_check_value(type, i));
 141
 142    /*
 143     * y = (yuyv >> 16*i) & 0xff
 144     * u = (yuyv >> 8   ) & 0xff
 145     * v = (yuyv >> 24  ) & 0xff
 146     */
 147
 148 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 149    /*
 150     * Avoid shift with per-element count.
 151     * No support on x86, gets translated to roughly 5 instructions
 152     * per element. Didn't measure performance but cuts shader size
 153     * by quite a bit (less difference if cpu has no sse4.1 support).
 154     */
 155    if (util_cpu_caps.has_sse2 && n == 4) {
 156       LLVMValueRef sel, tmp;
 157       struct lp_build_context bld32;
 158
 159       lp_build_context_init(&bld32, gallivm, type);
 160
 161       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
 162       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
 163        *y = lp_build_select(&bld32, sel, packed, tmp);
 164    } else
 165 #endif
 166    {
 167       LLVMValueRef shift;
 168       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
 169       *y = LLVMBuildLShr(builder, packed, shift, "");
 170    }
 171
 172    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
 173    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
 174
 175    mask = lp_build_const_int_vec(gallivm, type, 0xff);
 176
 177    *y = LLVMBuildAnd(builder, *y, mask, "y");
 178    *u = LLVMBuildAnd(builder, *u, mask, "u");
 179    *v = LLVMBuildAnd(builder, *v, mask, "v");
 180 }
 181
 182
 183 static INLINE void
 184 yuv_to_rgb_soa(struct gallivm_state *gallivm,
 185                unsigned n,
 186                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
 187                LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
 188 {
 189    LLVMBuilderRef builder = gallivm->builder;
 190    struct lp_type type;
 191    struct lp_build_context bld;
 192
 193    LLVMValueRef c0;
 194    LLVMValueRef c8;
 195    LLVMValueRef c16;
 196    LLVMValueRef c128;
 197    LLVMValueRef c255;
 198
 199    LLVMValueRef cy;
 200    LLVMValueRef cug;
 201    LLVMValueRef cub;
 202    LLVMValueRef cvr;
 203    LLVMValueRef cvg;
 204
 205    memset(&type, 0, sizeof type);
 206    type.sign = TRUE;
 207    type.width = 32;
 208    type.length = n;
 209
 210    lp_build_context_init(&bld, gallivm, type);
 211
 212    assert(lp_check_value(type, y));
 213    assert(lp_check_value(type, u));
 214    assert(lp_check_value(type, v));
 215
 216    /*
 217     * Constants
 218     */
 219
 220    c0   = lp_build_const_int_vec(gallivm, type,   0);
 221    c8   = lp_build_const_int_vec(gallivm, type,   8);
 222    c16  = lp_build_const_int_vec(gallivm, type,  16);
 223    c128 = lp_build_const_int_vec(gallivm, type, 128);
 224    c255 = lp_build_const_int_vec(gallivm, type, 255);
 225
 226    cy  = lp_build_const_int_vec(gallivm, type,  298);
 227    cug = lp_build_const_int_vec(gallivm, type, -100);
 228    cub = lp_build_const_int_vec(gallivm, type,  516);
 229    cvr = lp_build_const_int_vec(gallivm, type,  409);
 230    cvg = lp_build_const_int_vec(gallivm, type, -208);
 231
 232    /*
 233     *  y -= 16;
 234     *  u -= 128;
 235     *  v -= 128;
 236     */
 237
 238    y = LLVMBuildSub(builder, y, c16, "");
 239    u = LLVMBuildSub(builder, u, c128, "");
 240    v = LLVMBuildSub(builder, v, c128, "");
 241
 242    /*
 243     * r = 298 * _y            + 409 * _v + 128;
 244     * g = 298 * _y - 100 * _u - 208 * _v + 128;
 245     * b = 298 * _y + 516 * _u            + 128;
 246     */
 247
 248    y = LLVMBuildMul(builder, y, cy, "");
 249    y = LLVMBuildAdd(builder, y, c128, "");
 250
 251    *r = LLVMBuildMul(builder, v, cvr, "");
 252    *g = LLVMBuildAdd(builder,
 253                      LLVMBuildMul(builder, u, cug, ""),
 254                      LLVMBuildMul(builder, v, cvg, ""),
 255                      "");
 256    *b = LLVMBuildMul(builder, u, cub, "");
 257
 258    *r = LLVMBuildAdd(builder, *r, y, "");
 259    *g = LLVMBuildAdd(builder, *g, y, "");
 260    *b = LLVMBuildAdd(builder, *b, y, "");
 261
 262    /*
 263     * r >>= 8;
 264     * g >>= 8;
 265     * b >>= 8;
 266     */
 267
 268    *r = LLVMBuildAShr(builder, *r, c8, "r");
 269    *g = LLVMBuildAShr(builder, *g, c8, "g");
 270    *b = LLVMBuildAShr(builder, *b, c8, "b");
 271
 272    /*
 273     * Clamp
 274     */
 275
 276    *r = lp_build_clamp(&bld, *r, c0, c255);
 277    *g = lp_build_clamp(&bld, *g, c0, c255);
 278    *b = lp_build_clamp(&bld, *b, c0, c255);
 279 }
 280
 281
 282 static LLVMValueRef
 283 rgb_to_rgba_aos(struct gallivm_state *gallivm,
 284                 unsigned n,
 285                 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
 286 {
 287    LLVMBuilderRef builder = gallivm->builder;
 288    struct lp_type type;
 289    LLVMValueRef a;
 290    LLVMValueRef rgba;
 291
 292    memset(&type, 0, sizeof type);
 293    type.sign = TRUE;
 294    type.width = 32;
 295    type.length = n;
 296
 297    assert(lp_check_value(type, r));
 298    assert(lp_check_value(type, g));
 299    assert(lp_check_value(type, b));
 300
 301    /*
 302     * Make a 4 x unorm8 vector
 303     */
 304
 305    r = r;
 306    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
 307    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
 308    a = lp_build_const_int_vec(gallivm, type, 0xff000000);
 309
 310    rgba = r;
 311    rgba = LLVMBuildOr(builder, rgba, g, "");
 312    rgba = LLVMBuildOr(builder, rgba, b, "");
 313    rgba = LLVMBuildOr(builder, rgba, a, "");
 314
 315    rgba = LLVMBuildBitCast(builder, rgba,
 316                            LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
 317
 318    return rgba;
 319 }
 320
 321
 322 /**
 323  * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
 324  */
 325 static LLVMValueRef
 326 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
 327                  unsigned n,
 328                  LLVMValueRef packed,
 329                  LLVMValueRef i)
 330 {
 331    LLVMValueRef y, u, v;
 332    LLVMValueRef r, g, b;
 333    LLVMValueRef rgba;
 334
 335    uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
 336    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
 337    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 338
 339    return rgba;
 340 }
 341
 342
 343 /**
 344  * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
 345  */
 346 static LLVMValueRef
 347 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
 348                  unsigned n,
 349                  LLVMValueRef packed,
 350                  LLVMValueRef i)
 351 {
 352    LLVMValueRef y, u, v;
 353    LLVMValueRef r, g, b;
 354    LLVMValueRef rgba;
 355
 356    yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
 357    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
 358    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 359
 360    return rgba;
 361 }
 362
 363
 364 /**
 365  * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
 366  */
 367 static LLVMValueRef
 368 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
 369                  unsigned n,
 370                  LLVMValueRef packed,
 371                  LLVMValueRef i)
 372 {
 373    LLVMValueRef r, g, b;
 374    LLVMValueRef rgba;
 375
 376    uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
 377    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 378
 379    return rgba;
 380 }
 381
 382
 383 /**
 384  * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
 385  */
 386 static LLVMValueRef
 387 grgb_to_rgba_aos(struct gallivm_state *gallivm,
 388                  unsigned n,
 389                  LLVMValueRef packed,
 390                  LLVMValueRef i)
 391 {
 392    LLVMValueRef r, g, b;
 393    LLVMValueRef rgba;
 394
 395    yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
 396    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
 397
 398    return rgba;
 399 }
 400
 401
 402 /**
 403  * @param n  is the number of pixels processed
 404  * @param packed  is a <n x i32> vector with the packed YUYV blocks
 405  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 406  * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
 407  */
 408 LLVMValueRef
 409 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
 410                                    const struct util_format_description *format_desc,
 411                                    unsigned n,
 412                                    LLVMValueRef base_ptr,
 413                                    LLVMValueRef offset,
 414                                    LLVMValueRef i,
 415                                    LLVMValueRef j)
 416 {
 417    LLVMValueRef packed;
 418    LLVMValueRef rgba;
 419
 420    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
 421    assert(format_desc->block.bits == 32);
 422    assert(format_desc->block.width == 2);
 423    assert(format_desc->block.height == 1);
 424
 425    packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);
 426
 427    (void)j;
 428
 429    switch (format_desc->format) {
 430    case PIPE_FORMAT_UYVY:
 431       rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
 432       break;
 433    case PIPE_FORMAT_YUYV:
 434       rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
 435       break;
 436    case PIPE_FORMAT_R8G8_B8G8_UNORM:
 437       rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
 438       break;
 439    case PIPE_FORMAT_G8R8_G8B8_UNORM:
 440       rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
 441       break;
 442    default:
 443       assert(0);
 444       rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
 445       break;
 446    }
 447
 448    return rgba;
 449 }
 450