src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * AoS pixel format manipulation.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_format.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_math.h"
  39 #include "util/u_string.h"
  40
  41 #include "lp_bld_arit.h"
  42 #include "lp_bld_init.h"
  43 #include "lp_bld_type.h"
  44 #include "lp_bld_flow.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_swizzle.h"
  48 #include "lp_bld_format.h"
  49
  50
  51 /**
  52  * Basic swizzling.  Rearrange the order of the unswizzled array elements
  53  * according to the format description.  PIPE_SWIZZLE_ZERO/ONE are supported
  54  * too.
  55  * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
  56  */
  57 LLVMValueRef
  58 lp_build_format_swizzle_aos(const struct util_format_description *desc,
  59                             struct lp_build_context *bld,
  60                             LLVMValueRef unswizzled)
  61 {
  62    unsigned char swizzles[4];
  63    unsigned chan;
  64
  65    assert(bld->type.length % 4 == 0);
  66
  67    for (chan = 0; chan < 4; ++chan) {
  68       enum util_format_swizzle swizzle;
  69
  70       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  71          /*
  72           * For ZS formats do RGBA = ZZZ1
  73           */
  74          if (chan == 3) {
  75             swizzle = UTIL_FORMAT_SWIZZLE_1;
  76          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
  77             swizzle = UTIL_FORMAT_SWIZZLE_0;
  78          } else {
  79             swizzle = desc->swizzle[0];
  80          }
  81       } else {
  82          swizzle = desc->swizzle[chan];
  83       }
  84       swizzles[chan] = swizzle;
  85    }
  86
  87    return lp_build_swizzle_aos(bld, unswizzled, swizzles);
  88 }
  89
  90
  91 /**
  92  * Whether the format matches the vector type, apart of swizzles.
  93  */
  94 static INLINE boolean
  95 format_matches_type(const struct util_format_description *desc,
  96                     struct lp_type type)
  97 {
  98    enum util_format_type chan_type;
  99    unsigned chan;
 100
 101    assert(type.length % 4 == 0);
 102
 103    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
 104        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) {
 105       return FALSE;
 106    }
 107
 108    if (type.floating) {
 109       chan_type = UTIL_FORMAT_TYPE_FLOAT;
 110    } else if (type.fixed) {
 111       chan_type = UTIL_FORMAT_TYPE_FIXED;
 112    } else if (type.sign) {
 113       chan_type = UTIL_FORMAT_TYPE_SIGNED;
 114    } else {
 115       chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
 116    }
 117
 118    for (chan = 0; chan < desc->nr_channels; ++chan) {
 119       if (desc->channel[chan].size != type.width) {
 120          return FALSE;
 121       }
 122
 123       if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
 124          if (desc->channel[chan].type != chan_type ||
 125              desc->channel[chan].normalized != type.norm) {
 126             return FALSE;
 127          }
 128       }
 129    }
 130
 131    return TRUE;
 132 }
 133
 134
 135 /**
 136  * Unpack a single pixel into its RGBA components.
 137  *
 138  * @param desc  the pixel format for the packed pixel value
 139  * @param type  the desired return type (float[4] vs. ubyte[4])
 140  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
 141  *
 142  * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
 143  */
 144 static INLINE LLVMValueRef
 145 lp_build_unpack_rgba_aos(const struct util_format_description *desc,
 146                          struct lp_build_context *bld,
 147                          LLVMValueRef packed)
 148 {
 149    LLVMBuilderRef builder = bld->builder;
 150    struct lp_type type = bld->type;
 151    LLVMValueRef shifted, casted, scaled, masked;
 152    LLVMValueRef shifts[4];
 153    LLVMValueRef masks[4];
 154    LLVMValueRef scales[4];
 155
 156    boolean normalized;
 157    boolean needs_uitofp;
 158    unsigned shift;
 159    unsigned i;
 160
 161    /* TODO: Support more formats */
 162    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 163    assert(desc->block.width == 1);
 164    assert(desc->block.height == 1);
 165    assert(desc->block.bits <= 32);
 166
 167    /* Do the intermediate integer computations with 32bit integers since it
 168     * matches floating point size */
 169    if (desc->block.bits < 32)
 170       packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
 171
 172    /* Broadcast the packed value to all four channels
 173     * before: packed = BGRA
 174     * after: packed = {BGRA, BGRA, BGRA, BGRA}
 175     */
 176    packed = LLVMBuildInsertElement(builder,
 177                                    LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
 178                                    packed,
 179                                    LLVMConstNull(LLVMInt32Type()),
 180                                    "");
 181    packed = LLVMBuildShuffleVector(builder,
 182                                    packed,
 183                                    LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
 184                                    LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
 185                                    "");
 186
 187    /* Initialize vector constants */
 188    normalized = FALSE;
 189    needs_uitofp = FALSE;
 190    shift = 0;
 191
 192    /* Loop over 4 color components */
 193    for (i = 0; i < 4; ++i) {
 194       unsigned bits = desc->channel[i].size;
 195
 196       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 197          shifts[i] = LLVMGetUndef(LLVMInt32Type());
 198          masks[i] = LLVMConstNull(LLVMInt32Type());
 199          scales[i] =  LLVMConstNull(LLVMFloatType());
 200       }
 201       else {
 202          unsigned long long mask = (1ULL << bits) - 1;
 203
 204          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 205
 206          if (bits == 32) {
 207             needs_uitofp = TRUE;
 208          }
 209
 210          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
 211          masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
 212
 213          if (desc->channel[i].normalized) {
 214             scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
 215             normalized = TRUE;
 216          }
 217          else
 218             scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
 219       }
 220
 221       shift += bits;
 222    }
 223
 224    /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
 225     * into masked = {B, G, R, A}
 226     */
 227    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
 228    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
 229
 230
 231    if (!needs_uitofp) {
 232       /* UIToFP can't be expressed in SSE2 */
 233       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 234    } else {
 235       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 236    }
 237
 238    /* At this point 'casted' may be a vector of floats such as
 239     * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
 240     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
 241     */
 242
 243    if (normalized)
 244       scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
 245    else
 246       scaled = casted;
 247
 248    /*
 249     * Type conversion.
 250     *
 251     * TODO: We could avoid floating conversion for integer to
 252     * integer conversions.
 253     */
 254
 255    lp_build_conv(builder,
 256                  lp_float32_vec4_type(),
 257                  type,
 258                  &scaled, 1, &scaled, 1);
 259
 260    scaled = lp_build_format_swizzle_aos(desc, bld, scaled);
 261
 262    return scaled;
 263 }
 264
 265
 266 /**
 267  * Pack a single pixel.
 268  *
 269  * @param rgba 4 float vector with the unpacked components.
 270  *
 271  * XXX: This is mostly for reference and testing -- operating a single pixel at
 272  * a time is rarely if ever needed.
 273  */
 274 LLVMValueRef
 275 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 276                        const struct util_format_description *desc,
 277                        LLVMValueRef rgba)
 278 {
 279    LLVMTypeRef type;
 280    LLVMValueRef packed = NULL;
 281    LLVMValueRef swizzles[4];
 282    LLVMValueRef shifted, casted, scaled, unswizzled;
 283    LLVMValueRef shifts[4];
 284    LLVMValueRef scales[4];
 285    boolean normalized;
 286    unsigned shift;
 287    unsigned i, j;
 288
 289    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 290    assert(desc->block.width == 1);
 291    assert(desc->block.height == 1);
 292
 293    type = LLVMIntType(desc->block.bits);
 294
 295    /* Unswizzle the color components into the source vector. */
 296    for (i = 0; i < 4; ++i) {
 297       for (j = 0; j < 4; ++j) {
 298          if (desc->swizzle[j] == i)
 299             break;
 300       }
 301       if (j < 4)
 302          swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
 303       else
 304          swizzles[i] = LLVMGetUndef(LLVMInt32Type());
 305    }
 306
 307    unswizzled = LLVMBuildShuffleVector(builder, rgba,
 308                                        LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
 309                                        LLVMConstVector(swizzles, 4), "");
 310
 311    normalized = FALSE;
 312    shift = 0;
 313    for (i = 0; i < 4; ++i) {
 314       unsigned bits = desc->channel[i].size;
 315
 316       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 317          shifts[i] = LLVMGetUndef(LLVMInt32Type());
 318          scales[i] =  LLVMGetUndef(LLVMFloatType());
 319       }
 320       else {
 321          unsigned mask = (1 << bits) - 1;
 322
 323          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 324          assert(bits < 32);
 325
 326          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
 327
 328          if (desc->channel[i].normalized) {
 329             scales[i] = LLVMConstReal(LLVMFloatType(), mask);
 330             normalized = TRUE;
 331          }
 332          else
 333             scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
 334       }
 335
 336       shift += bits;
 337    }
 338
 339    if (normalized)
 340       scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
 341    else
 342       scaled = unswizzled;
 343
 344    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
 345
 346    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
 347
 348    /* Bitwise or all components */
 349    for (i = 0; i < 4; ++i) {
 350       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
 351          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
 352          if (packed)
 353             packed = LLVMBuildOr(builder, packed, component, "");
 354          else
 355             packed = component;
 356       }
 357    }
 358
 359    if (!packed)
 360       packed = LLVMGetUndef(LLVMInt32Type());
 361
 362    if (desc->block.bits < 32)
 363       packed = LLVMBuildTrunc(builder, packed, type, "");
 364
 365    return packed;
 366 }
 367
 368
 369 /**
 370  * Fetch a pixel into a 4 float AoS.
 371  *
 372  * \param format_desc  describes format of the image we're fetching from
 373  * \param ptr  address of the pixel block (or the texel if uncompressed)
 374  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 375  *              these will always be (0, 0).
 376  * \return  a 4 element vector with the pixel's RGBA values.
 377  */
 378 LLVMValueRef
 379 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 380                         const struct util_format_description *format_desc,
 381                         struct lp_type type,
 382                         LLVMValueRef ptr,
 383                         LLVMValueRef i,
 384                         LLVMValueRef j)
 385 {
 386    struct lp_build_context bld;
 387
 388    /* XXX: For now we only support one pixel at a time */
 389    assert(type.length == 4);
 390
 391    lp_build_context_init(&bld, builder, type);
 392
 393    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 394        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 395         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 396        format_desc->block.width == 1 &&
 397        format_desc->block.height == 1 &&
 398        util_is_pot(format_desc->block.bits) &&
 399        format_desc->block.bits <= 32 &&
 400        format_desc->is_bitmask &&
 401        !format_desc->is_mixed &&
 402        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
 403         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
 404    {
 405       LLVMValueRef packed;
 406
 407       ptr = LLVMBuildBitCast(builder, ptr,
 408                              LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
 409                              "");
 410
 411       packed = LLVMBuildLoad(builder, ptr, "packed");
 412
 413       if (format_matches_type(format_desc, type)) {
 414          /*
 415           * The format matches the type (apart of a swizzle) so no need for
 416           * scaling or converting.
 417           */
 418
 419          assert(format_desc->block.bits <= type.width * type.length);
 420          if (format_desc->block.bits < type.width * type.length) {
 421             packed = LLVMBuildZExt(builder, packed,
 422                                    LLVMIntType(type.width * type.length), "");
 423          }
 424
 425          packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), "");
 426
 427          return lp_build_format_swizzle_aos(format_desc, &bld, packed);
 428       } else {
 429          return lp_build_unpack_rgba_aos(format_desc, &bld, packed);
 430       }
 431    }
 432    else if (format_desc->fetch_rgba_float) {
 433       /*
 434        * Fallback to calling util_format_description::fetch_rgba_float.
 435        *
 436        * This is definitely not the most efficient way of fetching pixels, as
 437        * we miss the opportunity to do vectorization, but this it is a
 438        * convenient for formats or scenarios for which there was no opportunity
 439        * or incentive to optimize.
 440        */
 441
 442       LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
 443       char name[256];
 444       LLVMTypeRef f32t = LLVMFloatType();
 445       LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
 446       LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
 447       LLVMValueRef function;
 448       LLVMValueRef tmp_ptr;
 449       LLVMValueRef tmp_val;
 450       LLVMValueRef args[4];
 451
 452       util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
 453                     format_desc->short_name);
 454
 455       /*
 456        * Declare and bind format_desc->fetch_rgba_float().
 457        */
 458
 459       function = LLVMGetNamedFunction(module, name);
 460       if (!function) {
 461          LLVMTypeRef ret_type;
 462          LLVMTypeRef arg_types[4];
 463          LLVMTypeRef function_type;
 464
 465          ret_type = LLVMVoidType();
 466          arg_types[0] = pf32t;
 467          arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
 468          arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
 469          function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
 470          function = LLVMAddFunction(module, name, function_type);
 471
 472          LLVMSetFunctionCallConv(function, LLVMCCallConv);
 473          LLVMSetLinkage(function, LLVMExternalLinkage);
 474
 475          assert(LLVMIsDeclaration(function));
 476
 477          LLVMAddGlobalMapping(lp_build_engine, function,
 478                               func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
 479       }
 480
 481       tmp_ptr = lp_build_alloca(builder, f32x4t, "");
 482
 483       /*
 484        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
 485        * in the SoA vectors.
 486        */
 487
 488       args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
 489       args[1] = ptr;
 490       args[2] = i;
 491       args[3] = j;
 492
 493       LLVMBuildCall(builder, function, args, Elements(args), "");
 494
 495       tmp_val = LLVMBuildLoad(builder, tmp_ptr, "");
 496
 497       if (type.floating) {
 498          /* No further conversion necessary */
 499       } else {
 500          lp_build_conv(builder,
 501                        lp_float32_vec4_type(),
 502                        type,
 503                        &tmp_val, 1, &tmp_val, 1);
 504       }
 505
 506       return tmp_val;
 507    }
 508    else {
 509       assert(0);
 510       return lp_build_undef(type);
 511    }
 512 }