src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_defines.h"
  30
  31 #include "util/u_format.h"
  32 #include "util/u_memory.h"
  33 #include "util/u_string.h"
  34
  35 #include "lp_bld_type.h"
  36 #include "lp_bld_const.h"
  37 #include "lp_bld_conv.h"
  38 #include "lp_bld_swizzle.h"
  39 #include "lp_bld_gather.h"
  40 #include "lp_bld_debug.h"
  41 #include "lp_bld_format.h"
  42 #include "lp_bld_arit.h"
  43
  44
  45 void
  46 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  47                             struct lp_build_context *bld,
  48                             const LLVMValueRef *unswizzled,
  49                             LLVMValueRef swizzled_out[4])
  50 {
  51    assert(PIPE_SWIZZLE_0 == (int)PIPE_SWIZZLE_0);
  52    assert(PIPE_SWIZZLE_1 == (int)PIPE_SWIZZLE_1);
  53
  54    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  55       enum pipe_swizzle swizzle;
  56       LLVMValueRef depth_or_stencil;
  57
  58       if (util_format_has_stencil(format_desc) &&
  59           !util_format_has_depth(format_desc)) {
  60          assert(!bld->type.floating);
  61          swizzle = format_desc->swizzle[1];
  62       }
  63       else {
  64          assert(bld->type.floating);
  65          swizzle = format_desc->swizzle[0];
  66       }
  67       /*
  68        * Return zzz1 or sss1 for depth-stencil formats here.
  69        * Correct swizzling will be handled by apply_sampler_swizzle() later.
  70        */
  71       depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  72
  73       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
  74       swizzled_out[3] = bld->one;
  75    }
  76    else {
  77       unsigned chan;
  78       for (chan = 0; chan < 4; ++chan) {
  79          enum pipe_swizzle swizzle = format_desc->swizzle[chan];
  80          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  81       }
  82    }
  83 }
  84
  85
  86 /**
  87  * Unpack several pixels in SoA.
  88  *
  89  * It takes a vector of packed pixels:
  90  *
  91  *   packed = {P0, P1, P2, P3, ..., Pn}
  92  *
  93  * And will produce four vectors:
  94  *
  95  *   red    = {R0, R1, R2, R3, ..., Rn}
  96  *   green  = {G0, G1, G2, G3, ..., Gn}
  97  *   blue   = {B0, B1, B2, B3, ..., Bn}
  98  *   alpha  = {A0, A1, A2, A3, ..., An}
  99  *
 100  * It requires that a packed pixel fits into an element of the output
 101  * channels. The common case is when converting pixel with a depth of 32 bit or
 102  * less into floats.
 103  *
 104  * \param format_desc  the format of the 'packed' incoming pixel vector
 105  * \param type  the desired type for rgba_out (type.length = n, above)
 106  * \param packed  the incoming vector of packed pixels
 107  * \param rgba_out  returns the SoA R,G,B,A vectors
 108  */
 109 void
 110 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 111                          const struct util_format_description *format_desc,
 112                          struct lp_type type,
 113                          LLVMValueRef packed,
 114                          LLVMValueRef rgba_out[4])
 115 {
 116    LLVMBuilderRef builder = gallivm->builder;
 117    struct lp_build_context bld;
 118    LLVMValueRef inputs[4];
 119    unsigned chan;
 120
 121    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 122    assert(format_desc->block.width == 1);
 123    assert(format_desc->block.height == 1);
 124    assert(format_desc->block.bits <= type.width);
 125    /* FIXME: Support more output types */
 126    assert(type.width == 32);
 127
 128    lp_build_context_init(&bld, gallivm, type);
 129
 130    /* Decode the input vector components */
 131    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
 132       const unsigned width = format_desc->channel[chan].size;
 133       const unsigned start = format_desc->channel[chan].shift;
 134       const unsigned stop = start + width;
 135       LLVMValueRef input;
 136
 137       input = packed;
 138
 139       switch(format_desc->channel[chan].type) {
 140       case UTIL_FORMAT_TYPE_VOID:
 141          input = lp_build_undef(gallivm, type);
 142          break;
 143
 144       case UTIL_FORMAT_TYPE_UNSIGNED:
 145          /*
 146           * Align the LSB
 147           */
 148
 149          if (start) {
 150             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
 151          }
 152
 153          /*
 154           * Zero the MSBs
 155           */
 156
 157          if (stop < format_desc->block.bits) {
 158             unsigned mask = ((unsigned long long)1 << width) - 1;
 159             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
 160          }
 161
 162          /*
 163           * Type conversion
 164           */
 165
 166          if (type.floating) {
 167             if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
 168                if (format_desc->swizzle[3] == chan) {
 169                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 170                }
 171                else {
 172                   struct lp_type conv_type = lp_uint_type(type);
 173                   input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
 174                }
 175             }
 176             else {
 177                if(format_desc->channel[chan].normalized)
 178                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 179                else
 180                   input = LLVMBuildSIToFP(builder, input,
 181                                           lp_build_vec_type(gallivm, type), "");
 182             }
 183          }
 184          else if (format_desc->channel[chan].pure_integer) {
 185             /* Nothing to do */
 186          } else {
 187              /* FIXME */
 188              assert(0);
 189          }
 190
 191          break;
 192
 193       case UTIL_FORMAT_TYPE_SIGNED:
 194          /*
 195           * Align the sign bit first.
 196           */
 197
 198          if (stop < type.width) {
 199             unsigned bits = type.width - stop;
 200             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 201             input = LLVMBuildShl(builder, input, bits_val, "");
 202          }
 203
 204          /*
 205           * Align the LSB (with an arithmetic shift to preserve the sign)
 206           */
 207
 208          if (format_desc->channel[chan].size < type.width) {
 209             unsigned bits = type.width - format_desc->channel[chan].size;
 210             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 211             input = LLVMBuildAShr(builder, input, bits_val, "");
 212          }
 213
 214          /*
 215           * Type conversion
 216           */
 217
 218          if (type.floating) {
 219             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 220             if (format_desc->channel[chan].normalized) {
 221                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
 222                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 223                input = LLVMBuildFMul(builder, input, scale_val, "");
 224                /* the formula above will produce value below -1.0 for most negative
 225                 * value but everything seems happy with that hence disable for now */
 226                if (0)
 227                   input = lp_build_max(&bld, input,
 228                                        lp_build_const_vec(gallivm, type, -1.0f));
 229             }
 230          }
 231          else if (format_desc->channel[chan].pure_integer) {
 232             /* Nothing to do */
 233          } else {
 234              /* FIXME */
 235              assert(0);
 236          }
 237
 238          break;
 239
 240       case UTIL_FORMAT_TYPE_FLOAT:
 241          if (type.floating) {
 242             if (format_desc->channel[chan].size == 16) {
 243                struct lp_type f16i_type = type;
 244                f16i_type.width /= 2;
 245                f16i_type.floating = 0;
 246                if (start) {
 247                   input = LLVMBuildLShr(builder, input,
 248                              lp_build_const_int_vec(gallivm, type, start), "");
 249                }
 250                input = LLVMBuildTrunc(builder, input,
 251                                       lp_build_vec_type(gallivm, f16i_type), "");
 252                input = lp_build_half_to_float(gallivm, input);
 253             } else {
 254                assert(start == 0);
 255                assert(stop == 32);
 256                assert(type.width == 32);
 257             }
 258             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
 259          }
 260          else {
 261             /* FIXME */
 262             assert(0);
 263             input = lp_build_undef(gallivm, type);
 264          }
 265          break;
 266
 267       case UTIL_FORMAT_TYPE_FIXED:
 268          if (type.floating) {
 269             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
 270             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 271             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 272             input = LLVMBuildFMul(builder, input, scale_val, "");
 273          }
 274          else {
 275             /* FIXME */
 276             assert(0);
 277             input = lp_build_undef(gallivm, type);
 278          }
 279          break;
 280
 281       default:
 282          assert(0);
 283          input = lp_build_undef(gallivm, type);
 284          break;
 285       }
 286
 287       inputs[chan] = input;
 288    }
 289
 290    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 291 }
 292
 293
 294 /**
 295  * Convert a vector of rgba8 values into 32bit wide SoA vectors.
 296  *
 297  * \param dst_type  The desired return type. For pure integer formats
 298  *                  this should be a 32bit wide int or uint vector type,
 299  *                  otherwise a float vector type.
 300  *
 301  * \param packed    The rgba8 values to pack.
 302  *
 303  * \param rgba      The 4 SoA return vectors.
 304  */
 305 void
 306 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
 307                            struct lp_type dst_type,
 308                            LLVMValueRef packed,
 309                            LLVMValueRef *rgba)
 310 {
 311    LLVMBuilderRef builder = gallivm->builder;
 312    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
 313    unsigned chan;
 314
 315    /* XXX technically shouldn't use that for uint dst_type */
 316    packed = LLVMBuildBitCast(builder, packed,
 317                              lp_build_int_vec_type(gallivm, dst_type), "");
 318
 319    /* Decode the input vector components */
 320    for (chan = 0; chan < 4; ++chan) {
 321 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 322       unsigned start = chan*8;
 323 #else
 324       unsigned start = (3-chan)*8;
 325 #endif
 326       unsigned stop = start + 8;
 327       LLVMValueRef input;
 328
 329       input = packed;
 330
 331       if (start)
 332          input = LLVMBuildLShr(builder, input,
 333                                lp_build_const_int_vec(gallivm, dst_type, start), "");
 334
 335       if (stop < 32)
 336          input = LLVMBuildAnd(builder, input, mask, "");
 337
 338       if (dst_type.floating)
 339          input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
 340
 341       rgba[chan] = input;
 342    }
 343 }
 344
 345
 346
 347 /**
 348  * Fetch a texels from a texture, returning them in SoA layout.
 349  *
 350  * \param type  the desired return type for 'rgba'.  The vector length
 351  *              is the number of texels to fetch
 352  *
 353  * \param base_ptr  points to the base of the texture mip tree.
 354  * \param offset    offset to start of the texture image block.  For non-
 355  *                  compressed formats, this simply is an offset to the texel.
 356  *                  For compressed formats, it is an offset to the start of the
 357  *                  compressed data block.
 358  *
 359  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 360  *              these will always be (0,0).  For compressed formats, i will
 361  *              be in [0, block_width-1] and j will be in [0, block_height-1].
 362  * \param cache  optional value pointing to a lp_build_format_cache structure
 363  */
 364 void
 365 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 366                         const struct util_format_description *format_desc,
 367                         struct lp_type type,
 368                         LLVMValueRef base_ptr,
 369                         LLVMValueRef offset,
 370                         LLVMValueRef i,
 371                         LLVMValueRef j,
 372                         LLVMValueRef cache,
 373                         LLVMValueRef rgba_out[4])
 374 {
 375    LLVMBuilderRef builder = gallivm->builder;
 376
 377    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 378        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 379         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
 380         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 381        format_desc->block.width == 1 &&
 382        format_desc->block.height == 1 &&
 383        format_desc->block.bits <= type.width &&
 384        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
 385         format_desc->channel[0].size == 32 ||
 386         format_desc->channel[0].size == 16))
 387    {
 388       /*
 389        * The packed pixel fits into an element of the destination format. Put
 390        * the packed pixels into a vector and extract each component for all
 391        * vector elements in parallel.
 392        */
 393
 394       LLVMValueRef packed;
 395
 396       /*
 397        * gather the texels from the texture
 398        * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
 399        */
 400       assert(format_desc->block.bits <= type.width);
 401       packed = lp_build_gather(gallivm,
 402                                type.length,
 403                                format_desc->block.bits,
 404                                type.width,
 405                                TRUE,
 406                                base_ptr, offset, FALSE);
 407
 408       /*
 409        * convert texels to float rgba
 410        */
 411       lp_build_unpack_rgba_soa(gallivm,
 412                                format_desc,
 413                                type,
 414                                packed, rgba_out);
 415       return;
 416    }
 417
 418    if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
 419        format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
 420       /*
 421        * similar conceptually to above but requiring special
 422        * AoS packed -> SoA float conversion code.
 423        */
 424       LLVMValueRef packed;
 425
 426       assert(type.floating);
 427       assert(type.width == 32);
 428
 429       packed = lp_build_gather(gallivm, type.length,
 430                                format_desc->block.bits,
 431                                type.width, TRUE,
 432                                base_ptr, offset, FALSE);
 433       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
 434          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
 435       }
 436       else {
 437          lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
 438       }
 439       return;
 440    }
 441
 442    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
 443        format_desc->block.bits == 64) {
 444       /*
 445        * special case the format is 64 bits but we only require
 446        * 32bit (or 8bit) from each block.
 447        */
 448       LLVMValueRef packed;
 449
 450       if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
 451          /*
 452           * for stencil simply fix up offsets - could in fact change
 453           * base_ptr instead even outside the shader.
 454           */
 455          unsigned mask = (1 << 8) - 1;
 456          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
 457          offset = LLVMBuildAdd(builder, offset, s_offset, "");
 458          packed = lp_build_gather(gallivm, type.length, 32, type.width,
 459                                   TRUE, base_ptr, offset, FALSE);
 460          packed = LLVMBuildAnd(builder, packed,
 461                                lp_build_const_int_vec(gallivm, type, mask), "");
 462       }
 463       else {
 464          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 465          packed = lp_build_gather(gallivm, type.length, 32, type.width,
 466                                   TRUE, base_ptr, offset, TRUE);
 467          packed = LLVMBuildBitCast(builder, packed,
 468                                    lp_build_vec_type(gallivm, type), "");
 469       }
 470       /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
 471       rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
 472       rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
 473       return;
 474    }
 475
 476    /*
 477     * Try calling lp_build_fetch_rgba_aos for all pixels.
 478     */
 479
 480    if (util_format_fits_8unorm(format_desc) &&
 481        type.floating && type.width == 32 &&
 482        (type.length == 1 || (type.length % 4 == 0))) {
 483       struct lp_type tmp_type;
 484       LLVMValueRef tmp;
 485
 486       memset(&tmp_type, 0, sizeof tmp_type);
 487       tmp_type.width = 8;
 488       tmp_type.length = type.length * 4;
 489       tmp_type.norm = TRUE;
 490
 491       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 492                                     TRUE, base_ptr, offset, i, j, cache);
 493
 494       lp_build_rgba8_to_fi32_soa(gallivm,
 495                                 type,
 496                                 tmp,
 497                                 rgba_out);
 498
 499       return;
 500    }
 501
 502    if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
 503        /* non-srgb case is already handled above */
 504        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
 505        type.floating && type.width == 32 &&
 506        (type.length == 1 || (type.length % 4 == 0)) &&
 507        cache) {
 508       const struct util_format_description *format_decompressed;
 509       const struct util_format_description *flinear_desc;
 510       LLVMValueRef packed;
 511       flinear_desc = util_format_description(util_format_linear(format_desc->format));
 512       packed = lp_build_fetch_cached_texels(gallivm,
 513                                             flinear_desc,
 514                                             type.length,
 515                                             base_ptr,
 516                                             offset,
 517                                             i, j,
 518                                             cache);
 519       packed = LLVMBuildBitCast(builder, packed,
 520                                 lp_build_int_vec_type(gallivm, type), "");
 521       /*
 522        * The values are now packed so they match ordinary srgb RGBA8 format,
 523        * hence need to use matching format for unpack.
 524        */
 525       format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
 526
 527       lp_build_unpack_rgba_soa(gallivm,
 528                                format_decompressed,
 529                                type,
 530                                packed, rgba_out);
 531
 532       return;
 533    }
 534
 535    /*
 536     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
 537     *
 538     * This is not the most efficient way of fetching pixels, as we
 539     * miss some opportunities to do vectorization, but this is
 540     * convenient for formats or scenarios for which there was no
 541     * opportunity or incentive to optimize.
 542     */
 543
 544    {
 545       unsigned k, chan;
 546       struct lp_type tmp_type;
 547
 548       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 549          debug_printf("%s: scalar unpacking of %s\n",
 550                       __FUNCTION__, format_desc->short_name);
 551       }
 552
 553       tmp_type = type;
 554       tmp_type.length = 4;
 555
 556       for (chan = 0; chan < 4; ++chan) {
 557          rgba_out[chan] = lp_build_undef(gallivm, type);
 558       }
 559
 560       /* loop over number of pixels */
 561       for(k = 0; k < type.length; ++k) {
 562          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 563          LLVMValueRef offset_elem;
 564          LLVMValueRef i_elem, j_elem;
 565          LLVMValueRef tmp;
 566
 567          offset_elem = LLVMBuildExtractElement(builder, offset,
 568                                                index, "");
 569
 570          i_elem = LLVMBuildExtractElement(builder, i, index, "");
 571          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 572
 573          /* Get a single float[4]={R,G,B,A} pixel */
 574          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 575                                        TRUE, base_ptr, offset_elem,
 576                                        i_elem, j_elem, cache);
 577
 578          /*
 579           * Insert the AoS tmp value channels into the SoA result vectors at
 580           * position = 'index'.
 581           */
 582          for (chan = 0; chan < 4; ++chan) {
 583             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
 584             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
 585             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
 586                                                     tmp_chan, index, "");
 587          }
 588       }
 589    }
 590 }