src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_defines.h"
  30
  31 #include "util/u_format.h"
  32 #include "util/u_memory.h"
  33 #include "util/u_string.h"
  34
  35 #include "lp_bld_type.h"
  36 #include "lp_bld_const.h"
  37 #include "lp_bld_conv.h"
  38 #include "lp_bld_swizzle.h"
  39 #include "lp_bld_gather.h"
  40 #include "lp_bld_debug.h"
  41 #include "lp_bld_format.h"
  42 #include "lp_bld_arit.h"
  43 #include "lp_bld_pack.h"
  44
  45
  46 static void
  47 convert_to_soa(struct gallivm_state *gallivm,
  48                LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32],
  49                LLVMValueRef dst_soa[4],
  50                const struct lp_type soa_type)
  51 {
  52    unsigned j, k;
  53    struct lp_type aos_channel_type = soa_type;
  54
  55    LLVMValueRef aos_channels[4];
  56    unsigned pixels_per_channel = soa_type.length / 4;
  57
  58    debug_assert((soa_type.length % 4) == 0);
  59
  60    aos_channel_type.length >>= 1;
  61
  62    for (j = 0; j < 4; ++j) {
  63       LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 };
  64
  65       assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH);
  66
  67       for (k = 0; k < pixels_per_channel; ++k) {
  68          channel[k] = src_aos[j + 4 * k];
  69       }
  70
  71       aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel);
  72    }
  73
  74    lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa);
  75 }
  76
  77
  78 void
  79 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  80                             struct lp_build_context *bld,
  81                             const LLVMValueRef *unswizzled,
  82                             LLVMValueRef swizzled_out[4])
  83 {
  84    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  85       enum pipe_swizzle swizzle;
  86       LLVMValueRef depth_or_stencil;
  87
  88       if (util_format_has_stencil(format_desc) &&
  89           !util_format_has_depth(format_desc)) {
  90          assert(!bld->type.floating);
  91          swizzle = format_desc->swizzle[1];
  92       }
  93       else {
  94          assert(bld->type.floating);
  95          swizzle = format_desc->swizzle[0];
  96       }
  97       /*
  98        * Return zzz1 or sss1 for depth-stencil formats here.
  99        * Correct swizzling will be handled by apply_sampler_swizzle() later.
 100        */
 101       depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
 102
 103       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
 104       swizzled_out[3] = bld->one;
 105    }
 106    else {
 107       unsigned chan;
 108       for (chan = 0; chan < 4; ++chan) {
 109          enum pipe_swizzle swizzle = format_desc->swizzle[chan];
 110          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
 111       }
 112    }
 113 }
 114
 115
 116 /**
 117  * Unpack several pixels in SoA.
 118  *
 119  * It takes a vector of packed pixels:
 120  *
 121  *   packed = {P0, P1, P2, P3, ..., Pn}
 122  *
 123  * And will produce four vectors:
 124  *
 125  *   red    = {R0, R1, R2, R3, ..., Rn}
 126  *   green  = {G0, G1, G2, G3, ..., Gn}
 127  *   blue   = {B0, B1, B2, B3, ..., Bn}
 128  *   alpha  = {A0, A1, A2, A3, ..., An}
 129  *
 130  * It requires that a packed pixel fits into an element of the output
 131  * channels. The common case is when converting pixel with a depth of 32 bit or
 132  * less into floats.
 133  *
 134  * \param format_desc  the format of the 'packed' incoming pixel vector
 135  * \param type  the desired type for rgba_out (type.length = n, above)
 136  * \param packed  the incoming vector of packed pixels
 137  * \param rgba_out  returns the SoA R,G,B,A vectors
 138  */
 139 void
 140 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 141                          const struct util_format_description *format_desc,
 142                          struct lp_type type,
 143                          LLVMValueRef packed,
 144                          LLVMValueRef rgba_out[4])
 145 {
 146    LLVMBuilderRef builder = gallivm->builder;
 147    struct lp_build_context bld;
 148    LLVMValueRef inputs[4];
 149    unsigned chan;
 150
 151    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 152    assert(format_desc->block.width == 1);
 153    assert(format_desc->block.height == 1);
 154    assert(format_desc->block.bits <= type.width);
 155    /* FIXME: Support more output types */
 156    assert(type.width == 32);
 157
 158    lp_build_context_init(&bld, gallivm, type);
 159
 160    /* Decode the input vector components */
 161    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
 162       const unsigned width = format_desc->channel[chan].size;
 163       const unsigned start = format_desc->channel[chan].shift;
 164       const unsigned stop = start + width;
 165       LLVMValueRef input;
 166
 167       input = packed;
 168
 169       switch(format_desc->channel[chan].type) {
 170       case UTIL_FORMAT_TYPE_VOID:
 171          input = lp_build_undef(gallivm, type);
 172          break;
 173
 174       case UTIL_FORMAT_TYPE_UNSIGNED:
 175          /*
 176           * Align the LSB
 177           */
 178
 179          if (start) {
 180             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
 181          }
 182
 183          /*
 184           * Zero the MSBs
 185           */
 186
 187          if (stop < format_desc->block.bits) {
 188             unsigned mask = ((unsigned long long)1 << width) - 1;
 189             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
 190          }
 191
 192          /*
 193           * Type conversion
 194           */
 195
 196          if (type.floating) {
 197             if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
 198                if (format_desc->swizzle[3] == chan) {
 199                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 200                }
 201                else {
 202                   struct lp_type conv_type = lp_uint_type(type);
 203                   input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
 204                }
 205             }
 206             else {
 207                if(format_desc->channel[chan].normalized)
 208                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 209                else
 210                   input = LLVMBuildSIToFP(builder, input,
 211                                           lp_build_vec_type(gallivm, type), "");
 212             }
 213          }
 214          else if (format_desc->channel[chan].pure_integer) {
 215             /* Nothing to do */
 216          } else {
 217              /* FIXME */
 218              assert(0);
 219          }
 220
 221          break;
 222
 223       case UTIL_FORMAT_TYPE_SIGNED:
 224          /*
 225           * Align the sign bit first.
 226           */
 227
 228          if (stop < type.width) {
 229             unsigned bits = type.width - stop;
 230             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 231             input = LLVMBuildShl(builder, input, bits_val, "");
 232          }
 233
 234          /*
 235           * Align the LSB (with an arithmetic shift to preserve the sign)
 236           */
 237
 238          if (format_desc->channel[chan].size < type.width) {
 239             unsigned bits = type.width - format_desc->channel[chan].size;
 240             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 241             input = LLVMBuildAShr(builder, input, bits_val, "");
 242          }
 243
 244          /*
 245           * Type conversion
 246           */
 247
 248          if (type.floating) {
 249             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 250             if (format_desc->channel[chan].normalized) {
 251                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
 252                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 253                input = LLVMBuildFMul(builder, input, scale_val, "");
 254                /* the formula above will produce value below -1.0 for most negative
 255                 * value but everything seems happy with that hence disable for now */
 256                if (0)
 257                   input = lp_build_max(&bld, input,
 258                                        lp_build_const_vec(gallivm, type, -1.0f));
 259             }
 260          }
 261          else if (format_desc->channel[chan].pure_integer) {
 262             /* Nothing to do */
 263          } else {
 264              /* FIXME */
 265              assert(0);
 266          }
 267
 268          break;
 269
 270       case UTIL_FORMAT_TYPE_FLOAT:
 271          if (type.floating) {
 272             if (format_desc->channel[chan].size == 16) {
 273                struct lp_type f16i_type = type;
 274                f16i_type.width /= 2;
 275                f16i_type.floating = 0;
 276                if (start) {
 277                   input = LLVMBuildLShr(builder, input,
 278                              lp_build_const_int_vec(gallivm, type, start), "");
 279                }
 280                input = LLVMBuildTrunc(builder, input,
 281                                       lp_build_vec_type(gallivm, f16i_type), "");
 282                input = lp_build_half_to_float(gallivm, input);
 283             } else {
 284                assert(start == 0);
 285                assert(stop == 32);
 286                assert(type.width == 32);
 287             }
 288             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
 289          }
 290          else {
 291             /* FIXME */
 292             assert(0);
 293             input = lp_build_undef(gallivm, type);
 294          }
 295          break;
 296
 297       case UTIL_FORMAT_TYPE_FIXED:
 298          if (type.floating) {
 299             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
 300             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 301             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 302             input = LLVMBuildFMul(builder, input, scale_val, "");
 303          }
 304          else {
 305             /* FIXME */
 306             assert(0);
 307             input = lp_build_undef(gallivm, type);
 308          }
 309          break;
 310
 311       default:
 312          assert(0);
 313          input = lp_build_undef(gallivm, type);
 314          break;
 315       }
 316
 317       inputs[chan] = input;
 318    }
 319
 320    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 321 }
 322
 323
 324 /**
 325  * Convert a vector of rgba8 values into 32bit wide SoA vectors.
 326  *
 327  * \param dst_type  The desired return type. For pure integer formats
 328  *                  this should be a 32bit wide int or uint vector type,
 329  *                  otherwise a float vector type.
 330  *
 331  * \param packed    The rgba8 values to pack.
 332  *
 333  * \param rgba      The 4 SoA return vectors.
 334  */
 335 void
 336 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
 337                            struct lp_type dst_type,
 338                            LLVMValueRef packed,
 339                            LLVMValueRef *rgba)
 340 {
 341    LLVMBuilderRef builder = gallivm->builder;
 342    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
 343    unsigned chan;
 344
 345    /* XXX technically shouldn't use that for uint dst_type */
 346    packed = LLVMBuildBitCast(builder, packed,
 347                              lp_build_int_vec_type(gallivm, dst_type), "");
 348
 349    /* Decode the input vector components */
 350    for (chan = 0; chan < 4; ++chan) {
 351 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 352       unsigned start = chan*8;
 353 #else
 354       unsigned start = (3-chan)*8;
 355 #endif
 356       unsigned stop = start + 8;
 357       LLVMValueRef input;
 358
 359       input = packed;
 360
 361       if (start)
 362          input = LLVMBuildLShr(builder, input,
 363                                lp_build_const_int_vec(gallivm, dst_type, start), "");
 364
 365       if (stop < 32)
 366          input = LLVMBuildAnd(builder, input, mask, "");
 367
 368       if (dst_type.floating)
 369          input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
 370
 371       rgba[chan] = input;
 372    }
 373 }
 374
 375
 376
 377 /**
 378  * Fetch a texels from a texture, returning them in SoA layout.
 379  *
 380  * \param type  the desired return type for 'rgba'.  The vector length
 381  *              is the number of texels to fetch
 382  * \param aligned if the offset is guaranteed to be aligned to element width
 383  *
 384  * \param base_ptr  points to the base of the texture mip tree.
 385  * \param offset    offset to start of the texture image block.  For non-
 386  *                  compressed formats, this simply is an offset to the texel.
 387  *                  For compressed formats, it is an offset to the start of the
 388  *                  compressed data block.
 389  *
 390  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 391  *              these will always be (0,0).  For compressed formats, i will
 392  *              be in [0, block_width-1] and j will be in [0, block_height-1].
 393  * \param cache  optional value pointing to a lp_build_format_cache structure
 394  */
 395 void
 396 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 397                         const struct util_format_description *format_desc,
 398                         struct lp_type type,
 399                         boolean aligned,
 400                         LLVMValueRef base_ptr,
 401                         LLVMValueRef offset,
 402                         LLVMValueRef i,
 403                         LLVMValueRef j,
 404                         LLVMValueRef cache,
 405                         LLVMValueRef rgba_out[4])
 406 {
 407    LLVMBuilderRef builder = gallivm->builder;
 408
 409    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 410        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 411         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
 412         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 413        format_desc->block.width == 1 &&
 414        format_desc->block.height == 1 &&
 415        format_desc->block.bits <= type.width &&
 416        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
 417         format_desc->channel[0].size == 32 ||
 418         format_desc->channel[0].size == 16))
 419    {
 420       /*
 421        * The packed pixel fits into an element of the destination format. Put
 422        * the packed pixels into a vector and extract each component for all
 423        * vector elements in parallel.
 424        */
 425
 426       LLVMValueRef packed;
 427
 428       /*
 429        * gather the texels from the texture
 430        * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
 431        */
 432       assert(format_desc->block.bits <= type.width);
 433       packed = lp_build_gather(gallivm,
 434                                type.length,
 435                                format_desc->block.bits,
 436                                type.width,
 437                                aligned,
 438                                base_ptr, offset, FALSE);
 439
 440       /*
 441        * convert texels to float rgba
 442        */
 443       lp_build_unpack_rgba_soa(gallivm,
 444                                format_desc,
 445                                type,
 446                                packed, rgba_out);
 447       return;
 448    }
 449
 450    if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
 451        format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
 452       /*
 453        * similar conceptually to above but requiring special
 454        * AoS packed -> SoA float conversion code.
 455        */
 456       LLVMValueRef packed;
 457
 458       assert(type.floating);
 459       assert(type.width == 32);
 460
 461       packed = lp_build_gather(gallivm, type.length,
 462                                format_desc->block.bits,
 463                                type.width, aligned,
 464                                base_ptr, offset, FALSE);
 465       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
 466          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
 467       }
 468       else {
 469          lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
 470       }
 471       return;
 472    }
 473
 474    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
 475        format_desc->block.bits == 64) {
 476       /*
 477        * special case the format is 64 bits but we only require
 478        * 32bit (or 8bit) from each block.
 479        */
 480       LLVMValueRef packed;
 481
 482       if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
 483          /*
 484           * for stencil simply fix up offsets - could in fact change
 485           * base_ptr instead even outside the shader.
 486           */
 487          unsigned mask = (1 << 8) - 1;
 488          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
 489          offset = LLVMBuildAdd(builder, offset, s_offset, "");
 490          packed = lp_build_gather(gallivm, type.length, 32, type.width,
 491                                   aligned, base_ptr, offset, FALSE);
 492          packed = LLVMBuildAnd(builder, packed,
 493                                lp_build_const_int_vec(gallivm, type, mask), "");
 494       }
 495       else {
 496          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 497          packed = lp_build_gather(gallivm, type.length, 32, type.width,
 498                                   aligned, base_ptr, offset, TRUE);
 499          packed = LLVMBuildBitCast(builder, packed,
 500                                    lp_build_vec_type(gallivm, type), "");
 501       }
 502       /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
 503       rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
 504       rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
 505       return;
 506    }
 507
 508    /*
 509     * Try calling lp_build_fetch_rgba_aos for all pixels.
 510     */
 511
 512    if (util_format_fits_8unorm(format_desc) &&
 513        type.floating && type.width == 32 &&
 514        (type.length == 1 || (type.length % 4 == 0))) {
 515       struct lp_type tmp_type;
 516       LLVMValueRef tmp;
 517
 518       memset(&tmp_type, 0, sizeof tmp_type);
 519       tmp_type.width = 8;
 520       tmp_type.length = type.length * 4;
 521       tmp_type.norm = TRUE;
 522
 523       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 524                                     aligned, base_ptr, offset, i, j, cache);
 525
 526       lp_build_rgba8_to_fi32_soa(gallivm,
 527                                 type,
 528                                 tmp,
 529                                 rgba_out);
 530
 531       return;
 532    }
 533
 534    if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
 535        /* non-srgb case is already handled above */
 536        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
 537        type.floating && type.width == 32 &&
 538        (type.length == 1 || (type.length % 4 == 0)) &&
 539        cache) {
 540       const struct util_format_description *format_decompressed;
 541       const struct util_format_description *flinear_desc;
 542       LLVMValueRef packed;
 543       flinear_desc = util_format_description(util_format_linear(format_desc->format));
 544       /* This probably only works with aligned data */
 545       packed = lp_build_fetch_cached_texels(gallivm,
 546                                             flinear_desc,
 547                                             type.length,
 548                                             base_ptr,
 549                                             offset,
 550                                             i, j,
 551                                             cache);
 552       packed = LLVMBuildBitCast(builder, packed,
 553                                 lp_build_int_vec_type(gallivm, type), "");
 554       /*
 555        * The values are now packed so they match ordinary srgb RGBA8 format,
 556        * hence need to use matching format for unpack.
 557        */
 558       format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
 559
 560       lp_build_unpack_rgba_soa(gallivm,
 561                                format_decompressed,
 562                                type,
 563                                packed, rgba_out);
 564
 565       return;
 566    }
 567
 568    /*
 569     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
 570     *
 571     * This is not the most efficient way of fetching pixels, as we
 572     * miss some opportunities to do vectorization, but this is
 573     * convenient for formats or scenarios for which there was no
 574     * opportunity or incentive to optimize.
 575     */
 576
 577    {
 578       unsigned k;
 579       struct lp_type tmp_type;
 580       LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32];
 581
 582       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 583          debug_printf("%s: AoS fetch fallback for %s\n",
 584                       __FUNCTION__, format_desc->short_name);
 585       }
 586
 587       tmp_type = type;
 588       tmp_type.length = 4;
 589
 590       /*
 591        * Note that vector transpose can be worse compared to insert/extract
 592        * for aos->soa conversion (for formats with 1 or 2 channels). However,
 593        * we should try to avoid getting here for just about all formats, so
 594        * don't bother.
 595        */
 596
 597       /* loop over number of pixels */
 598       for(k = 0; k < type.length; ++k) {
 599          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 600          LLVMValueRef offset_elem;
 601          LLVMValueRef i_elem, j_elem;
 602
 603          offset_elem = LLVMBuildExtractElement(builder, offset,
 604                                                index, "");
 605
 606          i_elem = LLVMBuildExtractElement(builder, i, index, "");
 607          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 608
 609          /* Get a single float[4]={R,G,B,A} pixel */
 610          aos_fetch[k] = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 611                                                 aligned, base_ptr, offset_elem,
 612                                                 i_elem, j_elem, cache);
 613
 614       }
 615       convert_to_soa(gallivm, aos_fetch, rgba_out, type);
 616    }
 617 }