src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * AoS pixel format manipulation.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_format.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_math.h"
  39 #include "util/u_string.h"
  40
  41 #include "lp_bld_init.h"
  42 #include "lp_bld_type.h"
  43 #include "lp_bld_flow.h"
  44 #include "lp_bld_format.h"
  45
  46
  47 /**
  48  * Unpack a single pixel into its RGBA components.
  49  *
  50  * @param desc  the pixel format for the packed pixel value
  51  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
  52  *
  53  * @return RGBA in a 4 floats vector.
  54  */
  55 LLVMValueRef
  56 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
  57                          const struct util_format_description *desc,
  58                          LLVMValueRef packed)
  59 {
  60    LLVMValueRef shifted, casted, scaled, masked;
  61    LLVMValueRef shifts[4];
  62    LLVMValueRef masks[4];
  63    LLVMValueRef scales[4];
  64    LLVMValueRef swizzles[4];
  65    LLVMValueRef aux[4];
  66    boolean normalized;
  67    int empty_channel;
  68    boolean needs_uitofp;
  69    unsigned shift;
  70    unsigned i;
  71
  72    /* TODO: Support more formats */
  73    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  74    assert(desc->block.width == 1);
  75    assert(desc->block.height == 1);
  76    assert(desc->block.bits <= 32);
  77
  78    /* Do the intermediate integer computations with 32bit integers since it
  79     * matches floating point size */
  80    if (desc->block.bits < 32)
  81       packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
  82
  83    /* Broadcast the packed value to all four channels
  84     * before: packed = BGRA
  85     * after: packed = {BGRA, BGRA, BGRA, BGRA}
  86     */
  87    packed = LLVMBuildInsertElement(builder,
  88                                    LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
  89                                    packed,
  90                                    LLVMConstNull(LLVMInt32Type()),
  91                                    "");
  92    packed = LLVMBuildShuffleVector(builder,
  93                                    packed,
  94                                    LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
  95                                    LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
  96                                    "");
  97
  98    /* Initialize vector constants */
  99    normalized = FALSE;
 100    needs_uitofp = FALSE;
 101    empty_channel = -1;
 102    shift = 0;
 103
 104    /* Loop over 4 color components */
 105    for (i = 0; i < 4; ++i) {
 106       unsigned bits = desc->channel[i].size;
 107
 108       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 109          shifts[i] = LLVMGetUndef(LLVMInt32Type());
 110          masks[i] = LLVMConstNull(LLVMInt32Type());
 111          scales[i] =  LLVMConstNull(LLVMFloatType());
 112          empty_channel = i;
 113       }
 114       else {
 115          unsigned long long mask = (1ULL << bits) - 1;
 116
 117          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 118
 119          if (bits == 32) {
 120             needs_uitofp = TRUE;
 121          }
 122
 123          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
 124          masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
 125
 126          if (desc->channel[i].normalized) {
 127             scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
 128             normalized = TRUE;
 129          }
 130          else
 131             scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
 132       }
 133
 134       shift += bits;
 135    }
 136
 137    /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
 138     * into masked = {B, G, R, A}
 139     */
 140    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
 141    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
 142
 143
 144    if (!needs_uitofp) {
 145       /* UIToFP can't be expressed in SSE2 */
 146       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 147    } else {
 148       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 149    }
 150
 151    /* At this point 'casted' may be a vector of floats such as
 152     * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
 153     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
 154     */
 155
 156    if (normalized)
 157       scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
 158    else
 159       scaled = casted;
 160
 161    for (i = 0; i < 4; ++i)
 162       aux[i] = LLVMGetUndef(LLVMFloatType());
 163
 164    /* Build swizzles vector to put components into R,G,B,A order */
 165    for (i = 0; i < 4; ++i) {
 166       enum util_format_swizzle swizzle;
 167
 168       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
 169          /*
 170           * For ZS formats do RGBA = ZZZ1
 171           */
 172          if (i == 3) {
 173             swizzle = UTIL_FORMAT_SWIZZLE_1;
 174          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
 175             swizzle = UTIL_FORMAT_SWIZZLE_0;
 176          } else {
 177             swizzle = desc->swizzle[0];
 178          }
 179       } else {
 180          swizzle = desc->swizzle[i];
 181       }
 182
 183       switch (swizzle) {
 184       case UTIL_FORMAT_SWIZZLE_X:
 185       case UTIL_FORMAT_SWIZZLE_Y:
 186       case UTIL_FORMAT_SWIZZLE_Z:
 187       case UTIL_FORMAT_SWIZZLE_W:
 188          swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
 189          break;
 190       case UTIL_FORMAT_SWIZZLE_0:
 191          assert(empty_channel >= 0);
 192          swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
 193          break;
 194       case UTIL_FORMAT_SWIZZLE_1:
 195          swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
 196          aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
 197          break;
 198       case UTIL_FORMAT_SWIZZLE_NONE:
 199          swizzles[i] = LLVMGetUndef(LLVMFloatType());
 200          assert(0);
 201          break;
 202       }
 203    }
 204
 205    return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4),
 206                                  LLVMConstVector(swizzles, 4), "");
 207 }
 208
 209
 210 /**
 211  * Pack a single pixel.
 212  *
 213  * @param rgba 4 float vector with the unpacked components.
 214  *
 215  * XXX: This is mostly for reference and testing -- operating a single pixel at
 216  * a time is rarely if ever needed.
 217  */
 218 LLVMValueRef
 219 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 220                        const struct util_format_description *desc,
 221                        LLVMValueRef rgba)
 222 {
 223    LLVMTypeRef type;
 224    LLVMValueRef packed = NULL;
 225    LLVMValueRef swizzles[4];
 226    LLVMValueRef shifted, casted, scaled, unswizzled;
 227    LLVMValueRef shifts[4];
 228    LLVMValueRef scales[4];
 229    boolean normalized;
 230    unsigned shift;
 231    unsigned i, j;
 232
 233    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 234    assert(desc->block.width == 1);
 235    assert(desc->block.height == 1);
 236
 237    type = LLVMIntType(desc->block.bits);
 238
 239    /* Unswizzle the color components into the source vector. */
 240    for (i = 0; i < 4; ++i) {
 241       for (j = 0; j < 4; ++j) {
 242          if (desc->swizzle[j] == i)
 243             break;
 244       }
 245       if (j < 4)
 246          swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
 247       else
 248          swizzles[i] = LLVMGetUndef(LLVMInt32Type());
 249    }
 250
 251    unswizzled = LLVMBuildShuffleVector(builder, rgba,
 252                                        LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
 253                                        LLVMConstVector(swizzles, 4), "");
 254
 255    normalized = FALSE;
 256    shift = 0;
 257    for (i = 0; i < 4; ++i) {
 258       unsigned bits = desc->channel[i].size;
 259
 260       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 261          shifts[i] = LLVMGetUndef(LLVMInt32Type());
 262          scales[i] =  LLVMGetUndef(LLVMFloatType());
 263       }
 264       else {
 265          unsigned mask = (1 << bits) - 1;
 266
 267          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 268          assert(bits < 32);
 269
 270          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
 271
 272          if (desc->channel[i].normalized) {
 273             scales[i] = LLVMConstReal(LLVMFloatType(), mask);
 274             normalized = TRUE;
 275          }
 276          else
 277             scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
 278       }
 279
 280       shift += bits;
 281    }
 282
 283    if (normalized)
 284       scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
 285    else
 286       scaled = unswizzled;
 287
 288    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
 289
 290    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
 291
 292    /* Bitwise or all components */
 293    for (i = 0; i < 4; ++i) {
 294       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
 295          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
 296          if (packed)
 297             packed = LLVMBuildOr(builder, packed, component, "");
 298          else
 299             packed = component;
 300       }
 301    }
 302
 303    if (!packed)
 304       packed = LLVMGetUndef(LLVMInt32Type());
 305
 306    if (desc->block.bits < 32)
 307       packed = LLVMBuildTrunc(builder, packed, type, "");
 308
 309    return packed;
 310 }
 311
 312
 313 /**
 314  * Fetch a pixel into a 4 float AoS.
 315  *
 316  * \param format_desc  describes format of the image we're fetching from
 317  * \param ptr  address of the pixel block (or the texel if uncompressed)
 318  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 319  *              these will always be (0,).
 320  * \return  valueRef with the float[4] RGBA pixel
 321  */
 322 LLVMValueRef
 323 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 324                         const struct util_format_description *format_desc,
 325                         LLVMValueRef ptr,
 326                         LLVMValueRef i,
 327                         LLVMValueRef j)
 328 {
 329
 330    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 331        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 332         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 333        format_desc->block.width == 1 &&
 334        format_desc->block.height == 1 &&
 335        util_is_pot(format_desc->block.bits) &&
 336        format_desc->block.bits <= 32 &&
 337        format_desc->is_bitmask &&
 338        !format_desc->is_mixed &&
 339        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
 340         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
 341    {
 342       LLVMValueRef packed;
 343
 344       ptr = LLVMBuildBitCast(builder, ptr,
 345                              LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
 346                              "");
 347
 348       packed = LLVMBuildLoad(builder, ptr, "packed");
 349
 350       return lp_build_unpack_rgba_aos(builder, format_desc, packed);
 351    }
 352    else if (format_desc->fetch_rgba_float) {
 353       /*
 354        * Fallback to calling util_format_description::fetch_rgba_float.
 355        *
 356        * This is definitely not the most efficient way of fetching pixels, as
 357        * we miss the opportunity to do vectorization, but this it is a
 358        * convenient for formats or scenarios for which there was no opportunity
 359        * or incentive to optimize.
 360        */
 361
 362       LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
 363       char name[256];
 364       LLVMValueRef function;
 365       LLVMValueRef tmp;
 366       LLVMValueRef args[4];
 367
 368       util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
 369                     format_desc->short_name);
 370
 371       /*
 372        * Declare and bind format_desc->fetch_rgba_float().
 373        */
 374
 375       function = LLVMGetNamedFunction(module, name);
 376       if (!function) {
 377          LLVMTypeRef ret_type;
 378          LLVMTypeRef arg_types[4];
 379          LLVMTypeRef function_type;
 380
 381          ret_type = LLVMVoidType();
 382          arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
 383          arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
 384          arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
 385          function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
 386          function = LLVMAddFunction(module, name, function_type);
 387
 388          LLVMSetFunctionCallConv(function, LLVMCCallConv);
 389          LLVMSetLinkage(function, LLVMExternalLinkage);
 390
 391          assert(LLVMIsDeclaration(function));
 392
 393          LLVMAddGlobalMapping(lp_build_engine, function,
 394                               func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
 395       }
 396
 397       tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
 398
 399       /*
 400        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
 401        * in the SoA vectors.
 402        */
 403
 404       args[0] = LLVMBuildBitCast(builder, tmp,
 405                                  LLVMPointerType(LLVMFloatType(), 0), "");
 406       args[1] = ptr;
 407       args[2] = i;
 408       args[3] = j;
 409
 410       LLVMBuildCall(builder, function, args, Elements(args), "");
 411
 412       return LLVMBuildLoad(builder, tmp, "");
 413    }
 414    else {
 415       assert(0);
 416       return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
 417    }
 418 }