src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_defines.h"
  30
  31 #include "util/u_format.h"
  32 #include "util/u_memory.h"
  33 #include "util/u_string.h"
  34
  35 #include "lp_bld_type.h"
  36 #include "lp_bld_const.h"
  37 #include "lp_bld_conv.h"
  38 #include "lp_bld_swizzle.h"
  39 #include "lp_bld_gather.h"
  40 #include "lp_bld_debug.h"
  41 #include "lp_bld_format.h"
  42 #include "lp_bld_arit.h"
  43
  44
  45 void
  46 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  47                             struct lp_build_context *bld,
  48                             const LLVMValueRef *unswizzled,
  49                             LLVMValueRef swizzled_out[4])
  50 {
  51    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
  52    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
  53
  54    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  55       enum util_format_swizzle swizzle;
  56       LLVMValueRef depth_or_stencil;
  57
  58       if (util_format_has_stencil(format_desc) &&
  59           !util_format_has_depth(format_desc)) {
  60          assert(!bld->type.floating);
  61          swizzle = format_desc->swizzle[1];
  62       }
  63       else {
  64          assert(bld->type.floating);
  65          swizzle = format_desc->swizzle[0];
  66       }
  67       /*
  68        * Return zzz1 or sss1 for depth-stencil formats here.
  69        * Correct swizzling will be handled by apply_sampler_swizzle() later.
  70        */
  71       depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  72
  73       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
  74       swizzled_out[3] = bld->one;
  75    }
  76    else {
  77       unsigned chan;
  78       for (chan = 0; chan < 4; ++chan) {
  79          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
  80          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  81       }
  82    }
  83 }
  84
  85
  86 /**
  87  * Unpack several pixels in SoA.
  88  *
  89  * It takes a vector of packed pixels:
  90  *
  91  *   packed = {P0, P1, P2, P3, ..., Pn}
  92  *
  93  * And will produce four vectors:
  94  *
  95  *   red    = {R0, R1, R2, R3, ..., Rn}
  96  *   green  = {G0, G1, G2, G3, ..., Gn}
  97  *   blue   = {B0, B1, B2, B3, ..., Bn}
  98  *   alpha  = {A0, A1, A2, A3, ..., An}
  99  *
 100  * It requires that a packed pixel fits into an element of the output
 101  * channels. The common case is when converting pixel with a depth of 32 bit or
 102  * less into floats.
 103  *
 104  * \param format_desc  the format of the 'packed' incoming pixel vector
 105  * \param type  the desired type for rgba_out (type.length = n, above)
 106  * \param packed  the incoming vector of packed pixels
 107  * \param rgba_out  returns the SoA R,G,B,A vectors
 108  */
 109 void
 110 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 111                          const struct util_format_description *format_desc,
 112                          struct lp_type type,
 113                          LLVMValueRef packed,
 114                          LLVMValueRef rgba_out[4])
 115 {
 116    LLVMBuilderRef builder = gallivm->builder;
 117    struct lp_build_context bld;
 118    LLVMValueRef inputs[4];
 119    unsigned chan;
 120
 121    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 122    assert(format_desc->block.width == 1);
 123    assert(format_desc->block.height == 1);
 124    assert(format_desc->block.bits <= type.width);
 125    /* FIXME: Support more output types */
 126    assert(type.width == 32);
 127
 128    lp_build_context_init(&bld, gallivm, type);
 129
 130    /* Decode the input vector components */
 131    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
 132       const unsigned width = format_desc->channel[chan].size;
 133       const unsigned start = format_desc->channel[chan].shift;
 134       const unsigned stop = start + width;
 135       LLVMValueRef input;
 136
 137       input = packed;
 138
 139       switch(format_desc->channel[chan].type) {
 140       case UTIL_FORMAT_TYPE_VOID:
 141          input = lp_build_undef(gallivm, type);
 142          break;
 143
 144       case UTIL_FORMAT_TYPE_UNSIGNED:
 145          /*
 146           * Align the LSB
 147           */
 148
 149          if (start) {
 150             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
 151          }
 152
 153          /*
 154           * Zero the MSBs
 155           */
 156
 157          if (stop < format_desc->block.bits) {
 158             unsigned mask = ((unsigned long long)1 << width) - 1;
 159             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
 160          }
 161
 162          /*
 163           * Type conversion
 164           */
 165
 166          if (type.floating) {
 167             if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
 168                if (format_desc->swizzle[3] == chan) {
 169                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 170                }
 171                else {
 172                   struct lp_type conv_type = lp_uint_type(type);
 173                   input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
 174                }
 175             }
 176             else {
 177                if(format_desc->channel[chan].normalized)
 178                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 179                else
 180                   input = LLVMBuildSIToFP(builder, input,
 181                                           lp_build_vec_type(gallivm, type), "");
 182             }
 183          }
 184          else if (format_desc->channel[chan].pure_integer) {
 185             /* Nothing to do */
 186          } else {
 187              /* FIXME */
 188              assert(0);
 189          }
 190
 191          break;
 192
 193       case UTIL_FORMAT_TYPE_SIGNED:
 194          /*
 195           * Align the sign bit first.
 196           */
 197
 198          if (stop < type.width) {
 199             unsigned bits = type.width - stop;
 200             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 201             input = LLVMBuildShl(builder, input, bits_val, "");
 202          }
 203
 204          /*
 205           * Align the LSB (with an arithmetic shift to preserve the sign)
 206           */
 207
 208          if (format_desc->channel[chan].size < type.width) {
 209             unsigned bits = type.width - format_desc->channel[chan].size;
 210             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 211             input = LLVMBuildAShr(builder, input, bits_val, "");
 212          }
 213
 214          /*
 215           * Type conversion
 216           */
 217
 218          if (type.floating) {
 219             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 220             if (format_desc->channel[chan].normalized) {
 221                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
 222                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 223                input = LLVMBuildFMul(builder, input, scale_val, "");
 224                /* the formula above will produce value below -1.0 for most negative
 225                 * value but everything seems happy with that hence disable for now */
 226                if (0)
 227                   input = lp_build_max(&bld, input,
 228                                        lp_build_const_vec(gallivm, type, -1.0f));
 229             }
 230          }
 231          else if (format_desc->channel[chan].pure_integer) {
 232             /* Nothing to do */
 233          } else {
 234              /* FIXME */
 235              assert(0);
 236          }
 237
 238          break;
 239
 240       case UTIL_FORMAT_TYPE_FLOAT:
 241          if (type.floating) {
 242             assert(start == 0);
 243             assert(stop == 32);
 244             assert(type.width == 32);
 245             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
 246          }
 247          else {
 248             /* FIXME */
 249             assert(0);
 250             input = lp_build_undef(gallivm, type);
 251          }
 252          break;
 253
 254       case UTIL_FORMAT_TYPE_FIXED:
 255          if (type.floating) {
 256             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
 257             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 258             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 259             input = LLVMBuildFMul(builder, input, scale_val, "");
 260          }
 261          else {
 262             /* FIXME */
 263             assert(0);
 264             input = lp_build_undef(gallivm, type);
 265          }
 266          break;
 267
 268       default:
 269          assert(0);
 270          input = lp_build_undef(gallivm, type);
 271          break;
 272       }
 273
 274       inputs[chan] = input;
 275    }
 276
 277    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 278 }
 279
 280
 281 /**
 282  * Convert a vector of rgba8 values into 32bit wide SoA vectors.
 283  *
 284  * \param dst_type  The desired return type. For pure integer formats
 285  *                  this should be a 32bit wide int or uint vector type,
 286  *                  otherwise a float vector type.
 287  *
 288  * \param packed    The rgba8 values to pack.
 289  *
 290  * \param rgba      The 4 SoA return vectors.
 291  */
 292 void
 293 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
 294                            struct lp_type dst_type,
 295                            LLVMValueRef packed,
 296                            LLVMValueRef *rgba)
 297 {
 298    LLVMBuilderRef builder = gallivm->builder;
 299    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
 300    unsigned chan;
 301
 302    /* XXX technically shouldn't use that for uint dst_type */
 303    packed = LLVMBuildBitCast(builder, packed,
 304                              lp_build_int_vec_type(gallivm, dst_type), "");
 305
 306    /* Decode the input vector components */
 307    for (chan = 0; chan < 4; ++chan) {
 308 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 309       unsigned start = chan*8;
 310 #else
 311       unsigned start = (3-chan)*8;
 312 #endif
 313       unsigned stop = start + 8;
 314       LLVMValueRef input;
 315
 316       input = packed;
 317
 318       if (start)
 319          input = LLVMBuildLShr(builder, input,
 320                                lp_build_const_int_vec(gallivm, dst_type, start), "");
 321
 322       if (stop < 32)
 323          input = LLVMBuildAnd(builder, input, mask, "");
 324
 325       if (dst_type.floating)
 326          input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
 327
 328       rgba[chan] = input;
 329    }
 330 }
 331
 332
 333
 334 /**
 335  * Fetch a texels from a texture, returning them in SoA layout.
 336  *
 337  * \param type  the desired return type for 'rgba'.  The vector length
 338  *              is the number of texels to fetch
 339  *
 340  * \param base_ptr  points to the base of the texture mip tree.
 341  * \param offset    offset to start of the texture image block.  For non-
 342  *                  compressed formats, this simply is an offset to the texel.
 343  *                  For compressed formats, it is an offset to the start of the
 344  *                  compressed data block.
 345  *
 346  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 347  *              these will always be (0,0).  For compressed formats, i will
 348  *              be in [0, block_width-1] and j will be in [0, block_height-1].
 349  * \param cache  optional value pointing to a lp_build_format_cache structure
 350  */
 351 void
 352 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 353                         const struct util_format_description *format_desc,
 354                         struct lp_type type,
 355                         LLVMValueRef base_ptr,
 356                         LLVMValueRef offset,
 357                         LLVMValueRef i,
 358                         LLVMValueRef j,
 359                         LLVMValueRef cache,
 360                         LLVMValueRef rgba_out[4])
 361 {
 362    LLVMBuilderRef builder = gallivm->builder;
 363
 364    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 365        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 366         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
 367         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 368        format_desc->block.width == 1 &&
 369        format_desc->block.height == 1 &&
 370        format_desc->block.bits <= type.width &&
 371        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
 372         format_desc->channel[0].size == 32))
 373    {
 374       /*
 375        * The packed pixel fits into an element of the destination format. Put
 376        * the packed pixels into a vector and extract each component for all
 377        * vector elements in parallel.
 378        */
 379
 380       LLVMValueRef packed;
 381
 382       /*
 383        * gather the texels from the texture
 384        * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
 385        */
 386       assert(format_desc->block.bits <= type.width);
 387       packed = lp_build_gather(gallivm,
 388                                type.length,
 389                                format_desc->block.bits,
 390                                type.width,
 391                                TRUE,
 392                                base_ptr, offset, FALSE);
 393
 394       /*
 395        * convert texels to float rgba
 396        */
 397       lp_build_unpack_rgba_soa(gallivm,
 398                                format_desc,
 399                                type,
 400                                packed, rgba_out);
 401       return;
 402    }
 403
 404    if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
 405        format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
 406       /*
 407        * similar conceptually to above but requiring special
 408        * AoS packed -> SoA float conversion code.
 409        */
 410       LLVMValueRef packed;
 411
 412       assert(type.floating);
 413       assert(type.width == 32);
 414
 415       packed = lp_build_gather(gallivm, type.length,
 416                                format_desc->block.bits,
 417                                type.width, TRUE,
 418                                base_ptr, offset, FALSE);
 419       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
 420          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
 421       }
 422       else {
 423          lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
 424       }
 425       return;
 426    }
 427
 428    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
 429        format_desc->block.bits == 64) {
 430       /*
 431        * special case the format is 64 bits but we only require
 432        * 32bit (or 8bit) from each block.
 433        */
 434       LLVMValueRef packed;
 435
 436       if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
 437          /*
 438           * for stencil simply fix up offsets - could in fact change
 439           * base_ptr instead even outside the shader.
 440           */
 441          unsigned mask = (1 << 8) - 1;
 442          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
 443          offset = LLVMBuildAdd(builder, offset, s_offset, "");
 444          packed = lp_build_gather(gallivm, type.length, 32, type.width,
 445                                   TRUE, base_ptr, offset, FALSE);
 446          packed = LLVMBuildAnd(builder, packed,
 447                                lp_build_const_int_vec(gallivm, type, mask), "");
 448       }
 449       else {
 450          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 451          packed = lp_build_gather(gallivm, type.length, 32, type.width,
 452                                   TRUE, base_ptr, offset, TRUE);
 453          packed = LLVMBuildBitCast(builder, packed,
 454                                    lp_build_vec_type(gallivm, type), "");
 455       }
 456       /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
 457       rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
 458       rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
 459       return;
 460    }
 461
 462    /*
 463     * Try calling lp_build_fetch_rgba_aos for all pixels.
 464     */
 465
 466    if (util_format_fits_8unorm(format_desc) &&
 467        type.floating && type.width == 32 &&
 468        (type.length == 1 || (type.length % 4 == 0))) {
 469       struct lp_type tmp_type;
 470       LLVMValueRef tmp;
 471
 472       memset(&tmp_type, 0, sizeof tmp_type);
 473       tmp_type.width = 8;
 474       tmp_type.length = type.length * 4;
 475       tmp_type.norm = TRUE;
 476
 477       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 478                                     TRUE, base_ptr, offset, i, j, cache);
 479
 480       lp_build_rgba8_to_fi32_soa(gallivm,
 481                                 type,
 482                                 tmp,
 483                                 rgba_out);
 484
 485       return;
 486    }
 487
 488    if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
 489        /* non-srgb case is already handled above */
 490        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
 491        type.floating && type.width == 32 &&
 492        (type.length == 1 || (type.length % 4 == 0)) &&
 493        cache) {
 494       const struct util_format_description *format_decompressed;
 495       const struct util_format_description *flinear_desc;
 496       LLVMValueRef packed;
 497       flinear_desc = util_format_description(util_format_linear(format_desc->format));
 498       packed = lp_build_fetch_cached_texels(gallivm,
 499                                             flinear_desc,
 500                                             type.length,
 501                                             base_ptr,
 502                                             offset,
 503                                             i, j,
 504                                             cache);
 505       packed = LLVMBuildBitCast(builder, packed,
 506                                 lp_build_int_vec_type(gallivm, type), "");
 507       /*
 508        * The values are now packed so they match ordinary srgb RGBA8 format,
 509        * hence need to use matching format for unpack.
 510        */
 511       format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
 512
 513       lp_build_unpack_rgba_soa(gallivm,
 514                                format_decompressed,
 515                                type,
 516                                packed, rgba_out);
 517
 518       return;
 519    }
 520
 521    /*
 522     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
 523     *
 524     * This is not the most efficient way of fetching pixels, as we
 525     * miss some opportunities to do vectorization, but this is
 526     * convenient for formats or scenarios for which there was no
 527     * opportunity or incentive to optimize.
 528     */
 529
 530    {
 531       unsigned k, chan;
 532       struct lp_type tmp_type;
 533
 534       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 535          debug_printf("%s: scalar unpacking of %s\n",
 536                       __FUNCTION__, format_desc->short_name);
 537       }
 538
 539       tmp_type = type;
 540       tmp_type.length = 4;
 541
 542       for (chan = 0; chan < 4; ++chan) {
 543          rgba_out[chan] = lp_build_undef(gallivm, type);
 544       }
 545
 546       /* loop over number of pixels */
 547       for(k = 0; k < type.length; ++k) {
 548          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 549          LLVMValueRef offset_elem;
 550          LLVMValueRef i_elem, j_elem;
 551          LLVMValueRef tmp;
 552
 553          offset_elem = LLVMBuildExtractElement(builder, offset,
 554                                                index, "");
 555
 556          i_elem = LLVMBuildExtractElement(builder, i, index, "");
 557          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 558
 559          /* Get a single float[4]={R,G,B,A} pixel */
 560          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 561                                        TRUE, base_ptr, offset_elem,
 562                                        i_elem, j_elem, cache);
 563
 564          /*
 565           * Insert the AoS tmp value channels into the SoA result vectors at
 566           * position = 'index'.
 567           */
 568          for (chan = 0; chan < 4; ++chan) {
 569             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
 570             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
 571             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
 572                                                     tmp_chan, index, "");
 573          }
 574       }
 575    }
 576 }