src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_defines.h"
  30
  31 #include "util/u_format.h"
  32 #include "util/u_memory.h"
  33 #include "util/u_string.h"
  34
  35 #include "lp_bld_type.h"
  36 #include "lp_bld_const.h"
  37 #include "lp_bld_conv.h"
  38 #include "lp_bld_swizzle.h"
  39 #include "lp_bld_gather.h"
  40 #include "lp_bld_debug.h"
  41 #include "lp_bld_format.h"
  42 #include "lp_bld_arit.h"
  43
  44
  45 void
  46 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  47                             struct lp_build_context *bld,
  48                             const LLVMValueRef *unswizzled,
  49                             LLVMValueRef swizzled_out[4])
  50 {
  51    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
  52    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
  53
  54    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  55       enum util_format_swizzle swizzle;
  56       LLVMValueRef depth_or_stencil;
  57
  58       if (util_format_has_stencil(format_desc) &&
  59           !util_format_has_depth(format_desc)) {
  60          assert(!bld->type.floating);
  61          swizzle = format_desc->swizzle[1];
  62       }
  63       else {
  64          assert(bld->type.floating);
  65          swizzle = format_desc->swizzle[0];
  66       }
  67       /*
  68        * Return zzz1 or sss1 for depth-stencil formats here.
  69        * Correct swizzling will be handled by apply_sampler_swizzle() later.
  70        */
  71       depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  72
  73       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
  74       swizzled_out[3] = bld->one;
  75    }
  76    else {
  77       unsigned chan;
  78       for (chan = 0; chan < 4; ++chan) {
  79          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
  80          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  81       }
  82    }
  83 }
  84
  85
  86 /**
  87  * Unpack several pixels in SoA.
  88  *
  89  * It takes a vector of packed pixels:
  90  *
  91  *   packed = {P0, P1, P2, P3, ..., Pn}
  92  *
  93  * And will produce four vectors:
  94  *
  95  *   red    = {R0, R1, R2, R3, ..., Rn}
  96  *   green  = {G0, G1, G2, G3, ..., Gn}
  97  *   blue   = {B0, B1, B2, B3, ..., Bn}
  98  *   alpha  = {A0, A1, A2, A3, ..., An}
  99  *
 100  * It requires that a packed pixel fits into an element of the output
 101  * channels. The common case is when converting pixel with a depth of 32 bit or
 102  * less into floats.
 103  *
 104  * \param format_desc  the format of the 'packed' incoming pixel vector
 105  * \param type  the desired type for rgba_out (type.length = n, above)
 106  * \param packed  the incoming vector of packed pixels
 107  * \param rgba_out  returns the SoA R,G,B,A vectors
 108  */
 109 void
 110 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 111                          const struct util_format_description *format_desc,
 112                          struct lp_type type,
 113                          LLVMValueRef packed,
 114                          LLVMValueRef rgba_out[4])
 115 {
 116    LLVMBuilderRef builder = gallivm->builder;
 117    struct lp_build_context bld;
 118    LLVMValueRef inputs[4];
 119    unsigned chan;
 120
 121    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 122    assert(format_desc->block.width == 1);
 123    assert(format_desc->block.height == 1);
 124    assert(format_desc->block.bits <= type.width);
 125    /* FIXME: Support more output types */
 126    assert(type.width == 32);
 127
 128    lp_build_context_init(&bld, gallivm, type);
 129
 130    /* Decode the input vector components */
 131    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
 132       const unsigned width = format_desc->channel[chan].size;
 133       const unsigned start = format_desc->channel[chan].shift;
 134       const unsigned stop = start + width;
 135       LLVMValueRef input;
 136
 137       input = packed;
 138
 139       switch(format_desc->channel[chan].type) {
 140       case UTIL_FORMAT_TYPE_VOID:
 141          input = lp_build_undef(gallivm, type);
 142          break;
 143
 144       case UTIL_FORMAT_TYPE_UNSIGNED:
 145          /*
 146           * Align the LSB
 147           */
 148
 149          if (start) {
 150             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
 151          }
 152
 153          /*
 154           * Zero the MSBs
 155           */
 156
 157          if (stop < format_desc->block.bits) {
 158             unsigned mask = ((unsigned long long)1 << width) - 1;
 159             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
 160          }
 161
 162          /*
 163           * Type conversion
 164           */
 165
 166          if (type.floating) {
 167             if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
 168                if (format_desc->swizzle[3] == chan) {
 169                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 170                }
 171                else {
 172                   struct lp_type conv_type = lp_uint_type(type);
 173                   input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
 174                }
 175             }
 176             else {
 177                if(format_desc->channel[chan].normalized)
 178                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 179                else
 180                   input = LLVMBuildSIToFP(builder, input,
 181                                           lp_build_vec_type(gallivm, type), "");
 182             }
 183          }
 184          else if (format_desc->channel[chan].pure_integer) {
 185             /* Nothing to do */
 186          } else {
 187              /* FIXME */
 188              assert(0);
 189          }
 190
 191          break;
 192
 193       case UTIL_FORMAT_TYPE_SIGNED:
 194          /*
 195           * Align the sign bit first.
 196           */
 197
 198          if (stop < type.width) {
 199             unsigned bits = type.width - stop;
 200             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 201             input = LLVMBuildShl(builder, input, bits_val, "");
 202          }
 203
 204          /*
 205           * Align the LSB (with an arithmetic shift to preserve the sign)
 206           */
 207
 208          if (format_desc->channel[chan].size < type.width) {
 209             unsigned bits = type.width - format_desc->channel[chan].size;
 210             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 211             input = LLVMBuildAShr(builder, input, bits_val, "");
 212          }
 213
 214          /*
 215           * Type conversion
 216           */
 217
 218          if (type.floating) {
 219             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 220             if (format_desc->channel[chan].normalized) {
 221                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
 222                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 223                input = LLVMBuildFMul(builder, input, scale_val, "");
 224                /* the formula above will produce value below -1.0 for most negative
 225                 * value but everything seems happy with that hence disable for now */
 226                if (0)
 227                   input = lp_build_max(&bld, input,
 228                                        lp_build_const_vec(gallivm, type, -1.0f));
 229             }
 230          }
 231          else if (format_desc->channel[chan].pure_integer) {
 232             /* Nothing to do */
 233          } else {
 234              /* FIXME */
 235              assert(0);
 236          }
 237
 238          break;
 239
 240       case UTIL_FORMAT_TYPE_FLOAT:
 241          if (type.floating) {
 242             assert(start == 0);
 243             assert(stop == 32);
 244             assert(type.width == 32);
 245             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
 246          }
 247          else {
 248             /* FIXME */
 249             assert(0);
 250             input = lp_build_undef(gallivm, type);
 251          }
 252          break;
 253
 254       case UTIL_FORMAT_TYPE_FIXED:
 255          if (type.floating) {
 256             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
 257             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 258             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 259             input = LLVMBuildFMul(builder, input, scale_val, "");
 260          }
 261          else {
 262             /* FIXME */
 263             assert(0);
 264             input = lp_build_undef(gallivm, type);
 265          }
 266          break;
 267
 268       default:
 269          assert(0);
 270          input = lp_build_undef(gallivm, type);
 271          break;
 272       }
 273
 274       inputs[chan] = input;
 275    }
 276
 277    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 278 }
 279
 280
 281 /**
 282  * Convert a vector of rgba8 values into 32bit wide SoA vectors.
 283  *
 284  * \param dst_type  The desired return type. For pure integer formats
 285  *                  this should be a 32bit wide int or uint vector type,
 286  *                  otherwise a float vector type.
 287  *
 288  * \param packed    The rgba8 values to pack.
 289  *
 290  * \param rgba      The 4 SoA return vectors.
 291  */
 292 void
 293 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
 294                            struct lp_type dst_type,
 295                            LLVMValueRef packed,
 296                            LLVMValueRef *rgba)
 297 {
 298    LLVMBuilderRef builder = gallivm->builder;
 299    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
 300    unsigned chan;
 301
 302    /* XXX technically shouldn't use that for uint dst_type */
 303    packed = LLVMBuildBitCast(builder, packed,
 304                              lp_build_int_vec_type(gallivm, dst_type), "");
 305
 306    /* Decode the input vector components */
 307    for (chan = 0; chan < 4; ++chan) {
 308 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 309       unsigned start = chan*8;
 310 #else
 311       unsigned start = (3-chan)*8;
 312 #endif
 313       unsigned stop = start + 8;
 314       LLVMValueRef input;
 315
 316       input = packed;
 317
 318       if (start)
 319          input = LLVMBuildLShr(builder, input,
 320                                lp_build_const_int_vec(gallivm, dst_type, start), "");
 321
 322       if (stop < 32)
 323          input = LLVMBuildAnd(builder, input, mask, "");
 324
 325       if (dst_type.floating)
 326          input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
 327
 328       rgba[chan] = input;
 329    }
 330 }
 331
 332
 333
 334 /**
 335  * Fetch a texels from a texture, returning them in SoA layout.
 336  *
 337  * \param type  the desired return type for 'rgba'.  The vector length
 338  *              is the number of texels to fetch
 339  *
 340  * \param base_ptr  points to the base of the texture mip tree.
 341  * \param offset    offset to start of the texture image block.  For non-
 342  *                  compressed formats, this simply is an offset to the texel.
 343  *                  For compressed formats, it is an offset to the start of the
 344  *                  compressed data block.
 345  *
 346  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 347  *              these will always be (0,0).  For compressed formats, i will
 348  *              be in [0, block_width-1] and j will be in [0, block_height-1].
 349  */
 350 void
 351 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 352                         const struct util_format_description *format_desc,
 353                         struct lp_type type,
 354                         LLVMValueRef base_ptr,
 355                         LLVMValueRef offset,
 356                         LLVMValueRef i,
 357                         LLVMValueRef j,
 358                         LLVMValueRef rgba_out[4])
 359 {
 360    LLVMBuilderRef builder = gallivm->builder;
 361
 362    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 363        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 364         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
 365         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 366        format_desc->block.width == 1 &&
 367        format_desc->block.height == 1 &&
 368        format_desc->block.bits <= type.width &&
 369        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
 370         format_desc->channel[0].size == 32))
 371    {
 372       /*
 373        * The packed pixel fits into an element of the destination format. Put
 374        * the packed pixels into a vector and extract each component for all
 375        * vector elements in parallel.
 376        */
 377
 378       LLVMValueRef packed;
 379
 380       /*
 381        * gather the texels from the texture
 382        * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
 383        */
 384       assert(format_desc->block.bits <= type.width);
 385       packed = lp_build_gather(gallivm,
 386                                type.length,
 387                                format_desc->block.bits,
 388                                type.width,
 389                                base_ptr, offset, FALSE);
 390
 391       /*
 392        * convert texels to float rgba
 393        */
 394       lp_build_unpack_rgba_soa(gallivm,
 395                                format_desc,
 396                                type,
 397                                packed, rgba_out);
 398       return;
 399    }
 400
 401    if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
 402        format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
 403       /*
 404        * similar conceptually to above but requiring special
 405        * AoS packed -> SoA float conversion code.
 406        */
 407       LLVMValueRef packed;
 408
 409       assert(type.floating);
 410       assert(type.width == 32);
 411
 412       packed = lp_build_gather(gallivm, type.length,
 413                                format_desc->block.bits,
 414                                type.width, base_ptr, offset,
 415                                FALSE);
 416       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
 417          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
 418       }
 419       else {
 420          lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
 421       }
 422       return;
 423    }
 424
 425    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
 426        format_desc->block.bits == 64) {
 427       /*
 428        * special case the format is 64 bits but we only require
 429        * 32bit (or 8bit) from each block.
 430        */
 431       LLVMValueRef packed;
 432
 433       if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
 434          /*
 435           * for stencil simply fix up offsets - could in fact change
 436           * base_ptr instead even outside the shader.
 437           */
 438          unsigned mask = (1 << 8) - 1;
 439          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
 440          offset = LLVMBuildAdd(builder, offset, s_offset, "");
 441          packed = lp_build_gather(gallivm, type.length,
 442                                   32, type.width, base_ptr, offset, FALSE);
 443          packed = LLVMBuildAnd(builder, packed,
 444                                lp_build_const_int_vec(gallivm, type, mask), "");
 445       }
 446       else {
 447          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 448          packed = lp_build_gather(gallivm, type.length,
 449                                   32, type.width, base_ptr, offset, TRUE);
 450          packed = LLVMBuildBitCast(builder, packed,
 451                                    lp_build_vec_type(gallivm, type), "");
 452       }
 453       /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
 454       rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
 455       rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
 456       return;
 457    }
 458
 459    /*
 460     * Try calling lp_build_fetch_rgba_aos for all pixels.
 461     */
 462
 463    if (util_format_fits_8unorm(format_desc) &&
 464        type.floating && type.width == 32 &&
 465        (type.length == 1 || (type.length % 4 == 0))) {
 466       struct lp_type tmp_type;
 467       LLVMValueRef tmp;
 468
 469       memset(&tmp_type, 0, sizeof tmp_type);
 470       tmp_type.width = 8;
 471       tmp_type.length = type.length * 4;
 472       tmp_type.norm = TRUE;
 473
 474       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 475                                     base_ptr, offset, i, j);
 476
 477       lp_build_rgba8_to_fi32_soa(gallivm,
 478                                 type,
 479                                 tmp,
 480                                 rgba_out);
 481
 482       return;
 483    }
 484
 485    /*
 486     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
 487     *
 488     * This is not the most efficient way of fetching pixels, as we
 489     * miss some opportunities to do vectorization, but this is
 490     * convenient for formats or scenarios for which there was no
 491     * opportunity or incentive to optimize.
 492     */
 493
 494    {
 495       unsigned k, chan;
 496       struct lp_type tmp_type;
 497
 498       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 499          debug_printf("%s: scalar unpacking of %s\n",
 500                       __FUNCTION__, format_desc->short_name);
 501       }
 502
 503       tmp_type = type;
 504       tmp_type.length = 4;
 505
 506       for (chan = 0; chan < 4; ++chan) {
 507          rgba_out[chan] = lp_build_undef(gallivm, type);
 508       }
 509
 510       /* loop over number of pixels */
 511       for(k = 0; k < type.length; ++k) {
 512          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 513          LLVMValueRef offset_elem;
 514          LLVMValueRef i_elem, j_elem;
 515          LLVMValueRef tmp;
 516
 517          offset_elem = LLVMBuildExtractElement(builder, offset,
 518                                                index, "");
 519
 520          i_elem = LLVMBuildExtractElement(builder, i, index, "");
 521          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 522
 523          /* Get a single float[4]={R,G,B,A} pixel */
 524          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 525                                        base_ptr, offset_elem,
 526                                        i_elem, j_elem);
 527
 528          /*
 529           * Insert the AoS tmp value channels into the SoA result vectors at
 530           * position = 'index'.
 531           */
 532          for (chan = 0; chan < 4; ++chan) {
 533             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
 534             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
 535             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
 536                                                     tmp_chan, index, "");
 537          }
 538       }
 539    }
 540 }