src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * AoS pixel format manipulation.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_format.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_math.h"
  39 #include "util/u_string.h"
  40
  41 #include "lp_bld_init.h"
  42 #include "lp_bld_type.h"
  43 #include "lp_bld_flow.h"
  44 #include "lp_bld_format.h"
  45
  46
  47 /**
  48  * Unpack a single pixel into its RGBA components.
  49  *
  50  * @param packed integer.
  51  *
  52  * @return RGBA in a 4 floats vector.
  53  */
  54 LLVMValueRef
  55 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
  56                          const struct util_format_description *desc,
  57                          LLVMValueRef packed)
  58 {
  59    LLVMValueRef shifted, casted, scaled, masked;
  60    LLVMValueRef shifts[4];
  61    LLVMValueRef masks[4];
  62    LLVMValueRef scales[4];
  63    LLVMValueRef swizzles[4];
  64    LLVMValueRef aux[4];
  65    bool normalized;
  66    int empty_channel;
  67    bool needs_uitofp;
  68    unsigned shift;
  69    unsigned i;
  70
  71    /* TODO: Support more formats */
  72    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  73    assert(desc->block.width == 1);
  74    assert(desc->block.height == 1);
  75    assert(desc->block.bits <= 32);
  76
  77    /* Do the intermediate integer computations with 32bit integers since it
  78     * matches floating point size */
  79    if (desc->block.bits < 32)
  80       packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
  81
  82    /* Broadcast the packed value to all four channels */
  83    packed = LLVMBuildInsertElement(builder,
  84                                    LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
  85                                    packed,
  86                                    LLVMConstNull(LLVMInt32Type()),
  87                                    "");
  88    packed = LLVMBuildShuffleVector(builder,
  89                                    packed,
  90                                    LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
  91                                    LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
  92                                    "");
  93
  94    /* Initialize vector constants */
  95    normalized = FALSE;
  96    needs_uitofp = FALSE;
  97    empty_channel = -1;
  98    shift = 0;
  99    for (i = 0; i < 4; ++i) {
 100       unsigned bits = desc->channel[i].size;
 101
 102       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 103          shifts[i] = LLVMGetUndef(LLVMInt32Type());
 104          masks[i] = LLVMConstNull(LLVMInt32Type());
 105          scales[i] =  LLVMConstNull(LLVMFloatType());
 106          empty_channel = i;
 107       }
 108       else {
 109          unsigned long long mask = (1ULL << bits) - 1;
 110
 111          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 112
 113          if (bits == 32) {
 114             needs_uitofp = TRUE;
 115          }
 116
 117          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
 118          masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
 119
 120          if (desc->channel[i].normalized) {
 121             scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
 122             normalized = TRUE;
 123          }
 124          else
 125             scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
 126       }
 127
 128       shift += bits;
 129    }
 130
 131    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
 132    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
 133    if (!needs_uitofp) {
 134       /* UIToFP can't be expressed in SSE2 */
 135       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 136    } else {
 137       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 138    }
 139
 140    if (normalized)
 141       scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
 142    else
 143       scaled = casted;
 144
 145    for (i = 0; i < 4; ++i)
 146       aux[i] = LLVMGetUndef(LLVMFloatType());
 147
 148    for (i = 0; i < 4; ++i) {
 149       enum util_format_swizzle swizzle;
 150
 151       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
 152          /*
 153           * For ZS formats do RGBA = ZZZ1
 154           */
 155          if (i == 3) {
 156             swizzle = UTIL_FORMAT_SWIZZLE_1;
 157          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
 158             swizzle = UTIL_FORMAT_SWIZZLE_0;
 159          } else {
 160             swizzle = desc->swizzle[0];
 161          }
 162       } else {
 163          swizzle = desc->swizzle[i];
 164       }
 165
 166       switch (swizzle) {
 167       case UTIL_FORMAT_SWIZZLE_X:
 168       case UTIL_FORMAT_SWIZZLE_Y:
 169       case UTIL_FORMAT_SWIZZLE_Z:
 170       case UTIL_FORMAT_SWIZZLE_W:
 171          swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
 172          break;
 173       case UTIL_FORMAT_SWIZZLE_0:
 174          assert(empty_channel >= 0);
 175          swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
 176          break;
 177       case UTIL_FORMAT_SWIZZLE_1:
 178          swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
 179          aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
 180          break;
 181       case UTIL_FORMAT_SWIZZLE_NONE:
 182          swizzles[i] = LLVMGetUndef(LLVMFloatType());
 183          assert(0);
 184          break;
 185       }
 186    }
 187
 188    return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
 189 }
 190
 191
 192 /**
 193  * Pack a single pixel.
 194  *
 195  * @param rgba 4 float vector with the unpacked components.
 196  *
 197  * XXX: This is mostly for reference and testing -- operating a single pixel at
 198  * a time is rarely if ever needed.
 199  */
 200 LLVMValueRef
 201 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 202                        const struct util_format_description *desc,
 203                        LLVMValueRef rgba)
 204 {
 205    LLVMTypeRef type;
 206    LLVMValueRef packed = NULL;
 207    LLVMValueRef swizzles[4];
 208    LLVMValueRef shifted, casted, scaled, unswizzled;
 209    LLVMValueRef shifts[4];
 210    LLVMValueRef scales[4];
 211    bool normalized;
 212    unsigned shift;
 213    unsigned i, j;
 214
 215    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 216    assert(desc->block.width == 1);
 217    assert(desc->block.height == 1);
 218
 219    type = LLVMIntType(desc->block.bits);
 220
 221    /* Unswizzle the color components into the source vector. */
 222    for (i = 0; i < 4; ++i) {
 223       for (j = 0; j < 4; ++j) {
 224          if (desc->swizzle[j] == i)
 225             break;
 226       }
 227       if (j < 4)
 228          swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
 229       else
 230          swizzles[i] = LLVMGetUndef(LLVMInt32Type());
 231    }
 232
 233    unswizzled = LLVMBuildShuffleVector(builder, rgba,
 234                                        LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
 235                                        LLVMConstVector(swizzles, 4), "");
 236
 237    normalized = FALSE;
 238    shift = 0;
 239    for (i = 0; i < 4; ++i) {
 240       unsigned bits = desc->channel[i].size;
 241
 242       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 243          shifts[i] = LLVMGetUndef(LLVMInt32Type());
 244          scales[i] =  LLVMGetUndef(LLVMFloatType());
 245       }
 246       else {
 247          unsigned mask = (1 << bits) - 1;
 248
 249          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 250          assert(bits < 32);
 251
 252          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
 253
 254          if (desc->channel[i].normalized) {
 255             scales[i] = LLVMConstReal(LLVMFloatType(), mask);
 256             normalized = TRUE;
 257          }
 258          else
 259             scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
 260       }
 261
 262       shift += bits;
 263    }
 264
 265    if (normalized)
 266       scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
 267    else
 268       scaled = unswizzled;
 269
 270    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
 271
 272    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
 273
 274    /* Bitwise or all components */
 275    for (i = 0; i < 4; ++i) {
 276       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
 277          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
 278          if (packed)
 279             packed = LLVMBuildOr(builder, packed, component, "");
 280          else
 281             packed = component;
 282       }
 283    }
 284
 285    if (!packed)
 286       packed = LLVMGetUndef(LLVMInt32Type());
 287
 288    if (desc->block.bits < 32)
 289       packed = LLVMBuildTrunc(builder, packed, type, "");
 290
 291    return packed;
 292 }
 293
 294
 295 /**
 296  * Fetch a pixel into a 4 float AoS.
 297  *
 298  * i and j are the sub-block pixel coordinates.
 299  */
 300 LLVMValueRef
 301 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 302                         const struct util_format_description *format_desc,
 303                         LLVMValueRef ptr,
 304                         LLVMValueRef i,
 305                         LLVMValueRef j)
 306 {
 307
 308    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 309        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 310         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 311        format_desc->block.width == 1 &&
 312        format_desc->block.height == 1 &&
 313        util_is_pot(format_desc->block.bits) &&
 314        format_desc->block.bits <= 32 &&
 315        format_desc->is_bitmask &&
 316        !format_desc->is_mixed &&
 317        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
 318         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
 319    {
 320       LLVMValueRef packed;
 321
 322       ptr = LLVMBuildBitCast(builder, ptr,
 323                              LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
 324                              "");
 325
 326       packed = LLVMBuildLoad(builder, ptr, "packed");
 327
 328       return lp_build_unpack_rgba_aos(builder, format_desc, packed);
 329    }
 330    else if (format_desc->fetch_rgba_float) {
 331       /*
 332        * Fallback to calling util_format_description::fetch_rgba_float.
 333        *
 334        * This is definitely not the most efficient way of fetching pixels, as
 335        * we miss the opportunity to do vectorization, but this it is a
 336        * convenient for formats or scenarios for which there was no opportunity
 337        * or incentive to optimize.
 338        */
 339
 340       LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
 341       char name[256];
 342       LLVMValueRef function;
 343       LLVMValueRef tmp;
 344       LLVMValueRef args[4];
 345
 346       util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
 347                     format_desc->short_name);
 348
 349       /*
 350        * Declare and bind format_desc->fetch_rgba_float().
 351        */
 352
 353       function = LLVMGetNamedFunction(module, name);
 354       if (!function) {
 355          LLVMTypeRef ret_type;
 356          LLVMTypeRef arg_types[4];
 357          LLVMTypeRef function_type;
 358
 359          ret_type = LLVMVoidType();
 360          arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
 361          arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
 362          arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
 363          function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
 364          function = LLVMAddFunction(module, name, function_type);
 365
 366          LLVMSetFunctionCallConv(function, LLVMCCallConv);
 367          LLVMSetLinkage(function, LLVMExternalLinkage);
 368
 369          assert(LLVMIsDeclaration(function));
 370
 371          LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
 372       }
 373
 374       tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
 375
 376       /*
 377        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
 378        * in the SoA vectors.
 379        */
 380
 381       args[0] = LLVMBuildBitCast(builder, tmp,
 382                                  LLVMPointerType(LLVMFloatType(), 0), "");
 383       args[1] = ptr;
 384       args[2] = i;
 385       args[3] = j;
 386
 387       LLVMBuildCall(builder, function, args, 4, "");
 388
 389       return LLVMBuildLoad(builder, tmp, "");
 390    }
 391    else {
 392       assert(0);
 393       return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
 394    }
 395 }