src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_defines.h"
  30
  31 #include "util/u_format.h"
  32 #include "util/u_memory.h"
  33 #include "util/u_string.h"
  34
  35 #include "lp_bld_type.h"
  36 #include "lp_bld_const.h"
  37 #include "lp_bld_conv.h"
  38 #include "lp_bld_swizzle.h"
  39 #include "lp_bld_gather.h"
  40 #include "lp_bld_debug.h"
  41 #include "lp_bld_format.h"
  42
  43
  44 void
  45 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  46                             struct lp_build_context *bld,
  47                             const LLVMValueRef *unswizzled,
  48                             LLVMValueRef swizzled_out[4])
  49 {
  50    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
  51    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
  52
  53    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  54       enum util_format_swizzle swizzle;
  55       LLVMValueRef depth_or_stencil;
  56
  57       if (util_format_has_stencil(format_desc) &&
  58           !util_format_has_depth(format_desc)) {
  59          assert(!bld->type.floating);
  60          swizzle = format_desc->swizzle[1];
  61       }
  62       else {
  63          assert(bld->type.floating);
  64          swizzle = format_desc->swizzle[0];
  65       }
  66       /*
  67        * Return zzz1 or sss1 for depth-stencil formats here.
  68        * Correct swizzling will be handled by apply_sampler_swizzle() later.
  69        */
  70       depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  71
  72       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
  73       swizzled_out[3] = bld->one;
  74    }
  75    else {
  76       unsigned chan;
  77       for (chan = 0; chan < 4; ++chan) {
  78          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
  79          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  80       }
  81    }
  82 }
  83
  84
  85 /**
  86  * Unpack several pixels in SoA.
  87  *
  88  * It takes a vector of packed pixels:
  89  *
  90  *   packed = {P0, P1, P2, P3, ..., Pn}
  91  *
  92  * And will produce four vectors:
  93  *
  94  *   red    = {R0, R1, R2, R3, ..., Rn}
  95  *   green  = {G0, G1, G2, G3, ..., Gn}
  96  *   blue   = {B0, B1, B2, B3, ..., Bn}
  97  *   alpha  = {A0, A1, A2, A3, ..., An}
  98  *
  99  * It requires that a packed pixel fits into an element of the output
 100  * channels. The common case is when converting pixel with a depth of 32 bit or
 101  * less into floats.
 102  *
 103  * \param format_desc  the format of the 'packed' incoming pixel vector
 104  * \param type  the desired type for rgba_out (type.length = n, above)
 105  * \param packed  the incoming vector of packed pixels
 106  * \param rgba_out  returns the SoA R,G,B,A vectors
 107  */
 108 void
 109 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 110                          const struct util_format_description *format_desc,
 111                          struct lp_type type,
 112                          LLVMValueRef packed,
 113                          LLVMValueRef rgba_out[4])
 114 {
 115    LLVMBuilderRef builder = gallivm->builder;
 116    struct lp_build_context bld;
 117    LLVMValueRef inputs[4];
 118    unsigned chan;
 119
 120    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 121    assert(format_desc->block.width == 1);
 122    assert(format_desc->block.height == 1);
 123    assert(format_desc->block.bits <= type.width);
 124    /* FIXME: Support more output types */
 125    assert(type.width == 32);
 126
 127    lp_build_context_init(&bld, gallivm, type);
 128
 129    /* Decode the input vector components */
 130    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
 131       const unsigned width = format_desc->channel[chan].size;
 132       const unsigned start = format_desc->channel[chan].shift;
 133       const unsigned stop = start + width;
 134       LLVMValueRef input;
 135
 136       input = packed;
 137
 138       switch(format_desc->channel[chan].type) {
 139       case UTIL_FORMAT_TYPE_VOID:
 140          input = lp_build_undef(gallivm, type);
 141          break;
 142
 143       case UTIL_FORMAT_TYPE_UNSIGNED:
 144          /*
 145           * Align the LSB
 146           */
 147
 148          if (start) {
 149             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
 150          }
 151
 152          /*
 153           * Zero the MSBs
 154           */
 155
 156          if (stop < format_desc->block.bits) {
 157             unsigned mask = ((unsigned long long)1 << width) - 1;
 158             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
 159          }
 160
 161          /*
 162           * Type conversion
 163           */
 164
 165          if (type.floating) {
 166             if(format_desc->channel[chan].normalized)
 167                input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 168             else
 169                input = LLVMBuildSIToFP(builder, input,
 170                                        lp_build_vec_type(gallivm, type), "");
 171          }
 172          else if (format_desc->channel[chan].pure_integer) {
 173             /* Nothing to do */
 174          } else {
 175              /* FIXME */
 176              assert(0);
 177          }
 178
 179          break;
 180
 181       case UTIL_FORMAT_TYPE_SIGNED:
 182          /*
 183           * Align the sign bit first.
 184           */
 185
 186          if (stop < type.width) {
 187             unsigned bits = type.width - stop;
 188             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 189             input = LLVMBuildShl(builder, input, bits_val, "");
 190          }
 191
 192          /*
 193           * Align the LSB (with an arithmetic shift to preserve the sign)
 194           */
 195
 196          if (format_desc->channel[chan].size < type.width) {
 197             unsigned bits = type.width - format_desc->channel[chan].size;
 198             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 199             input = LLVMBuildAShr(builder, input, bits_val, "");
 200          }
 201
 202          /*
 203           * Type conversion
 204           */
 205
 206          if (type.floating) {
 207             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 208             if (format_desc->channel[chan].normalized) {
 209                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
 210                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 211                input = LLVMBuildFMul(builder, input, scale_val, "");
 212             }
 213          }
 214          else if (format_desc->channel[chan].pure_integer) {
 215             /* Nothing to do */
 216          } else {
 217              /* FIXME */
 218              assert(0);
 219          }
 220
 221          break;
 222
 223       case UTIL_FORMAT_TYPE_FLOAT:
 224          if (type.floating) {
 225             assert(start == 0);
 226             assert(stop == 32);
 227             assert(type.width == 32);
 228             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
 229          }
 230          else {
 231             /* FIXME */
 232             assert(0);
 233             input = lp_build_undef(gallivm, type);
 234          }
 235          break;
 236
 237       case UTIL_FORMAT_TYPE_FIXED:
 238          if (type.floating) {
 239             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
 240             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 241             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 242             input = LLVMBuildFMul(builder, input, scale_val, "");
 243          }
 244          else {
 245             /* FIXME */
 246             assert(0);
 247             input = lp_build_undef(gallivm, type);
 248          }
 249          break;
 250
 251       default:
 252          assert(0);
 253          input = lp_build_undef(gallivm, type);
 254          break;
 255       }
 256
 257       inputs[chan] = input;
 258    }
 259
 260    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 261 }
 262
 263
 264 /**
 265  * Convert a vector of rgba8 values into 32bit wide SoA vectors.
 266  *
 267  * \param dst_type  The desired return type. For pure integer formats
 268  *                  this should be a 32bit wide int or uint vector type,
 269  *                  otherwise a float vector type.
 270  *
 271  * \param packed    The rgba8 values to pack.
 272  *
 273  * \param rgba      The 4 SoA return vectors.
 274  */
 275 void
 276 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
 277                            struct lp_type dst_type,
 278                            LLVMValueRef packed,
 279                            LLVMValueRef *rgba)
 280 {
 281    LLVMBuilderRef builder = gallivm->builder;
 282    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
 283    unsigned chan;
 284
 285    /* XXX technically shouldn't use that for uint dst_type */
 286    packed = LLVMBuildBitCast(builder, packed,
 287                              lp_build_int_vec_type(gallivm, dst_type), "");
 288
 289    /* Decode the input vector components */
 290    for (chan = 0; chan < 4; ++chan) {
 291 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 292       unsigned start = chan*8;
 293 #else
 294       unsigned start = (3-chan)*8;
 295 #endif
 296       unsigned stop = start + 8;
 297       LLVMValueRef input;
 298
 299       input = packed;
 300
 301       if (start)
 302          input = LLVMBuildLShr(builder, input,
 303                                lp_build_const_int_vec(gallivm, dst_type, start), "");
 304
 305       if (stop < 32)
 306          input = LLVMBuildAnd(builder, input, mask, "");
 307
 308       if (dst_type.floating)
 309          input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
 310
 311       rgba[chan] = input;
 312    }
 313 }
 314
 315
 316
 317 /**
 318  * Fetch a texels from a texture, returning them in SoA layout.
 319  *
 320  * \param type  the desired return type for 'rgba'.  The vector length
 321  *              is the number of texels to fetch
 322  *
 323  * \param base_ptr  points to the base of the texture mip tree.
 324  * \param offset    offset to start of the texture image block.  For non-
 325  *                  compressed formats, this simply is an offset to the texel.
 326  *                  For compressed formats, it is an offset to the start of the
 327  *                  compressed data block.
 328  *
 329  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 330  *              these will always be (0,0).  For compressed formats, i will
 331  *              be in [0, block_width-1] and j will be in [0, block_height-1].
 332  */
 333 void
 334 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 335                         const struct util_format_description *format_desc,
 336                         struct lp_type type,
 337                         LLVMValueRef base_ptr,
 338                         LLVMValueRef offset,
 339                         LLVMValueRef i,
 340                         LLVMValueRef j,
 341                         LLVMValueRef rgba_out[4])
 342 {
 343    LLVMBuilderRef builder = gallivm->builder;
 344
 345    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 346        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 347         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 348        format_desc->block.width == 1 &&
 349        format_desc->block.height == 1 &&
 350        format_desc->block.bits <= type.width &&
 351        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
 352         format_desc->channel[0].size == 32))
 353    {
 354       /*
 355        * The packed pixel fits into an element of the destination format. Put
 356        * the packed pixels into a vector and extract each component for all
 357        * vector elements in parallel.
 358        */
 359
 360       LLVMValueRef packed;
 361
 362       /*
 363        * gather the texels from the texture
 364        * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
 365        */
 366       assert(format_desc->block.bits <= type.width);
 367       packed = lp_build_gather(gallivm,
 368                                type.length,
 369                                format_desc->block.bits,
 370                                type.width,
 371                                base_ptr, offset, FALSE);
 372
 373       /*
 374        * convert texels to float rgba
 375        */
 376       lp_build_unpack_rgba_soa(gallivm,
 377                                format_desc,
 378                                type,
 379                                packed, rgba_out);
 380       return;
 381    }
 382
 383    if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
 384        format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
 385       /*
 386        * similar conceptually to above but requiring special
 387        * AoS packed -> SoA float conversion code.
 388        */
 389       LLVMValueRef packed;
 390
 391       assert(type.floating);
 392       assert(type.width == 32);
 393
 394       packed = lp_build_gather(gallivm, type.length,
 395                                format_desc->block.bits,
 396                                type.width, base_ptr, offset,
 397                                FALSE);
 398       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
 399          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
 400       }
 401       else {
 402          lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
 403       }
 404       return;
 405    }
 406
 407    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
 408        format_desc->block.bits == 64) {
 409       /*
 410        * special case the format is 64 bits but we only require
 411        * 32bit (or 8bit) from each block.
 412        */
 413       LLVMValueRef packed;
 414
 415       if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
 416          /*
 417           * for stencil simply fix up offsets - could in fact change
 418           * base_ptr instead even outside the shader.
 419           */
 420          unsigned mask = (1 << 8) - 1;
 421          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
 422          offset = LLVMBuildAdd(builder, offset, s_offset, "");
 423          packed = lp_build_gather(gallivm, type.length,
 424                                   32, type.width, base_ptr, offset, FALSE);
 425          packed = LLVMBuildAnd(builder, packed,
 426                                lp_build_const_int_vec(gallivm, type, mask), "");
 427       }
 428       else {
 429          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 430          packed = lp_build_gather(gallivm, type.length,
 431                                   32, type.width, base_ptr, offset, TRUE);
 432          packed = LLVMBuildBitCast(builder, packed,
 433                                    lp_build_vec_type(gallivm, type), "");
 434       }
 435       /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
 436       rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
 437       rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
 438       return;
 439    }
 440
 441    /*
 442     * Try calling lp_build_fetch_rgba_aos for all pixels.
 443     */
 444
 445    if (util_format_fits_8unorm(format_desc) &&
 446        type.floating && type.width == 32 &&
 447        (type.length == 1 || (type.length % 4 == 0))) {
 448       struct lp_type tmp_type;
 449       LLVMValueRef tmp;
 450
 451       memset(&tmp_type, 0, sizeof tmp_type);
 452       tmp_type.width = 8;
 453       tmp_type.length = type.length * 4;
 454       tmp_type.norm = TRUE;
 455
 456       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 457                                     base_ptr, offset, i, j);
 458
 459       lp_build_rgba8_to_fi32_soa(gallivm,
 460                                 type,
 461                                 tmp,
 462                                 rgba_out);
 463
 464       return;
 465    }
 466
 467    /*
 468     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
 469     *
 470     * This is not the most efficient way of fetching pixels, as we
 471     * miss some opportunities to do vectorization, but this is
 472     * convenient for formats or scenarios for which there was no
 473     * opportunity or incentive to optimize.
 474     */
 475
 476    {
 477       unsigned k, chan;
 478       struct lp_type tmp_type;
 479
 480       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 481          debug_printf("%s: scalar unpacking of %s\n",
 482                       __FUNCTION__, format_desc->short_name);
 483       }
 484
 485       tmp_type = type;
 486       tmp_type.length = 4;
 487
 488       for (chan = 0; chan < 4; ++chan) {
 489          rgba_out[chan] = lp_build_undef(gallivm, type);
 490       }
 491
 492       /* loop over number of pixels */
 493       for(k = 0; k < type.length; ++k) {
 494          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 495          LLVMValueRef offset_elem;
 496          LLVMValueRef i_elem, j_elem;
 497          LLVMValueRef tmp;
 498
 499          offset_elem = LLVMBuildExtractElement(builder, offset,
 500                                                index, "");
 501
 502          i_elem = LLVMBuildExtractElement(builder, i, index, "");
 503          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 504
 505          /* Get a single float[4]={R,G,B,A} pixel */
 506          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 507                                        base_ptr, offset_elem,
 508                                        i_elem, j_elem);
 509
 510          /*
 511           * Insert the AoS tmp value channels into the SoA result vectors at
 512           * position = 'index'.
 513           */
 514          for (chan = 0; chan < 4; ++chan) {
 515             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
 516             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
 517             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
 518                                                     tmp_chan, index, "");
 519          }
 520       }
 521    }
 522 }