src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_defines.h"
  30
  31 #include "util/u_format.h"
  32 #include "util/u_memory.h"
  33 #include "util/u_string.h"
  34
  35 #include "lp_bld_type.h"
  36 #include "lp_bld_const.h"
  37 #include "lp_bld_conv.h"
  38 #include "lp_bld_swizzle.h"
  39 #include "lp_bld_gather.h"
  40 #include "lp_bld_debug.h"
  41 #include "lp_bld_format.h"
  42
  43
  44 void
  45 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  46                             struct lp_build_context *bld,
  47                             const LLVMValueRef *unswizzled,
  48                             LLVMValueRef swizzled_out[4])
  49 {
  50    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
  51    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
  52
  53    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  54       /*
  55        * Return zzz1 for depth-stencil formats.
  56        *
  57        * XXX: Allow to control the depth swizzle with an additional parameter,
  58        * as the caller may wish another depth swizzle, or retain the stencil
  59        * value.
  60        */
  61       enum util_format_swizzle swizzle = format_desc->swizzle[0];
  62       LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  63       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
  64       swizzled_out[3] = bld->one;
  65    }
  66    else {
  67       unsigned chan;
  68       for (chan = 0; chan < 4; ++chan) {
  69          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
  70          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  71       }
  72    }
  73 }
  74
  75
  76 /**
  77  * Unpack several pixels in SoA.
  78  *
  79  * It takes a vector of packed pixels:
  80  *
  81  *   packed = {P0, P1, P2, P3, ..., Pn}
  82  *
  83  * And will produce four vectors:
  84  *
  85  *   red    = {R0, R1, R2, R3, ..., Rn}
  86  *   green  = {G0, G1, G2, G3, ..., Gn}
  87  *   blue   = {B0, B1, B2, B3, ..., Bn}
  88  *   alpha  = {A0, A1, A2, A3, ..., An}
  89  *
  90  * It requires that a packed pixel fits into an element of the output
  91  * channels. The common case is when converting pixel with a depth of 32 bit or
  92  * less into floats.
  93  *
  94  * \param format_desc  the format of the 'packed' incoming pixel vector
  95  * \param type  the desired type for rgba_out (type.length = n, above)
  96  * \param packed  the incoming vector of packed pixels
  97  * \param rgba_out  returns the SoA R,G,B,A vectors
  98  */
  99 void
 100 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 101                          const struct util_format_description *format_desc,
 102                          struct lp_type type,
 103                          LLVMValueRef packed,
 104                          LLVMValueRef rgba_out[4])
 105 {
 106    LLVMBuilderRef builder = gallivm->builder;
 107    struct lp_build_context bld;
 108    LLVMValueRef inputs[4];
 109    unsigned start;
 110    unsigned chan;
 111
 112    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 113    assert(format_desc->block.width == 1);
 114    assert(format_desc->block.height == 1);
 115    assert(format_desc->block.bits <= type.width);
 116    /* FIXME: Support more output types */
 117    assert(type.width == 32);
 118
 119    lp_build_context_init(&bld, gallivm, type);
 120
 121    /* Decode the input vector components */
 122    start = 0;
 123    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
 124       const unsigned width = format_desc->channel[chan].size;
 125       const unsigned stop = start + width;
 126       LLVMValueRef input;
 127
 128       input = packed;
 129
 130       switch(format_desc->channel[chan].type) {
 131       case UTIL_FORMAT_TYPE_VOID:
 132          input = lp_build_undef(gallivm, type);
 133          break;
 134
 135       case UTIL_FORMAT_TYPE_UNSIGNED:
 136          /*
 137           * Align the LSB
 138           */
 139
 140          if (start) {
 141             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
 142          }
 143
 144          /*
 145           * Zero the MSBs
 146           */
 147
 148          if (stop < format_desc->block.bits) {
 149             unsigned mask = ((unsigned long long)1 << width) - 1;
 150             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
 151          }
 152
 153          /*
 154           * Type conversion
 155           */
 156
 157          if (type.floating) {
 158             if(format_desc->channel[chan].normalized)
 159                input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
 160             else
 161                input = LLVMBuildSIToFP(builder, input,
 162                                        lp_build_vec_type(gallivm, type), "");
 163          }
 164          else if (format_desc->channel[chan].pure_integer) {
 165             /* Nothing to do */
 166          } else {
 167              /* FIXME */
 168              assert(0);
 169          }
 170
 171          break;
 172
 173       case UTIL_FORMAT_TYPE_SIGNED:
 174          /*
 175           * Align the sign bit first.
 176           */
 177
 178          if (stop < type.width) {
 179             unsigned bits = type.width - stop;
 180             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 181             input = LLVMBuildShl(builder, input, bits_val, "");
 182          }
 183
 184          /*
 185           * Align the LSB (with an arithmetic shift to preserve the sign)
 186           */
 187
 188          if (format_desc->channel[chan].size < type.width) {
 189             unsigned bits = type.width - format_desc->channel[chan].size;
 190             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
 191             input = LLVMBuildAShr(builder, input, bits_val, "");
 192          }
 193
 194          /*
 195           * Type conversion
 196           */
 197
 198          if (type.floating) {
 199             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 200             if (format_desc->channel[chan].normalized) {
 201                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
 202                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 203                input = LLVMBuildFMul(builder, input, scale_val, "");
 204             }
 205          }
 206          else if (format_desc->channel[chan].pure_integer) {
 207             /* Nothing to do */
 208          } else {
 209              /* FIXME */
 210              assert(0);
 211          }
 212
 213          break;
 214
 215       case UTIL_FORMAT_TYPE_FLOAT:
 216          if (type.floating) {
 217             assert(start == 0);
 218             assert(stop == 32);
 219             assert(type.width == 32);
 220             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
 221          }
 222          else {
 223             /* FIXME */
 224             assert(0);
 225             input = lp_build_undef(gallivm, type);
 226          }
 227          break;
 228
 229       case UTIL_FORMAT_TYPE_FIXED:
 230          if (type.floating) {
 231             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
 232             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
 233             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
 234             input = LLVMBuildFMul(builder, input, scale_val, "");
 235          }
 236          else {
 237             /* FIXME */
 238             assert(0);
 239             input = lp_build_undef(gallivm, type);
 240          }
 241          break;
 242
 243       default:
 244          assert(0);
 245          input = lp_build_undef(gallivm, type);
 246          break;
 247       }
 248
 249       inputs[chan] = input;
 250
 251       start = stop;
 252    }
 253
 254    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 255 }
 256
 257
 258 /**
 259  * Convert a vector of rgba8 values into 32bit wide SoA vectors.
 260  *
 261  * \param dst_type  The desired return type. For pure integer formats
 262  *                  this should be a 32bit wide int or uint vector type,
 263  *                  otherwise a float vector type.
 264  *
 265  * \param packed    The rgba8 values to pack.
 266  *
 267  * \param rgba      The 4 SoA return vectors.
 268  */
 269 void
 270 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
 271                            struct lp_type dst_type,
 272                            LLVMValueRef packed,
 273                            LLVMValueRef *rgba)
 274 {
 275    LLVMBuilderRef builder = gallivm->builder;
 276    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
 277    unsigned chan;
 278
 279    /* XXX technically shouldn't use that for uint dst_type */
 280    packed = LLVMBuildBitCast(builder, packed,
 281                              lp_build_int_vec_type(gallivm, dst_type), "");
 282
 283    /* Decode the input vector components */
 284    for (chan = 0; chan < 4; ++chan) {
 285       unsigned start = chan*8;
 286       unsigned stop = start + 8;
 287       LLVMValueRef input;
 288
 289       input = packed;
 290
 291       if (start)
 292          input = LLVMBuildLShr(builder, input,
 293                                lp_build_const_int_vec(gallivm, dst_type, start), "");
 294
 295       if (stop < 32)
 296          input = LLVMBuildAnd(builder, input, mask, "");
 297
 298       if (dst_type.floating)
 299          input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
 300
 301       rgba[chan] = input;
 302    }
 303 }
 304
 305
 306
 307 /**
 308  * Fetch a texels from a texture, returning them in SoA layout.
 309  *
 310  * \param type  the desired return type for 'rgba'.  The vector length
 311  *              is the number of texels to fetch
 312  *
 313  * \param base_ptr  points to start of the texture image block.  For non-
 314  *                  compressed formats, this simply points to the texel.
 315  *                  For compressed formats, it points to the start of the
 316  *                  compressed data block.
 317  *
 318  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 319  *              these will always be (0,0).  For compressed formats, i will
 320  *              be in [0, block_width-1] and j will be in [0, block_height-1].
 321  */
 322 void
 323 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 324                         const struct util_format_description *format_desc,
 325                         struct lp_type type,
 326                         LLVMValueRef base_ptr,
 327                         LLVMValueRef offset,
 328                         LLVMValueRef i,
 329                         LLVMValueRef j,
 330                         LLVMValueRef rgba_out[4])
 331 {
 332    LLVMBuilderRef builder = gallivm->builder;
 333
 334    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 335        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 336         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 337        format_desc->block.width == 1 &&
 338        format_desc->block.height == 1 &&
 339        format_desc->block.bits <= type.width &&
 340        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
 341         format_desc->channel[0].size == 32))
 342    {
 343       /*
 344        * The packed pixel fits into an element of the destination format. Put
 345        * the packed pixels into a vector and extract each component for all
 346        * vector elements in parallel.
 347        */
 348
 349       LLVMValueRef packed;
 350
 351       /*
 352        * gather the texels from the texture
 353        * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
 354        */
 355       packed = lp_build_gather(gallivm,
 356                                type.length,
 357                                format_desc->block.bits,
 358                                type.width,
 359                                base_ptr, offset);
 360
 361       /*
 362        * convert texels to float rgba
 363        */
 364       lp_build_unpack_rgba_soa(gallivm,
 365                                format_desc,
 366                                type,
 367                                packed, rgba_out);
 368       return;
 369    }
 370
 371    /*
 372     * Try calling lp_build_fetch_rgba_aos for all pixels.
 373     */
 374
 375    if (util_format_fits_8unorm(format_desc) &&
 376        type.floating && type.width == 32 &&
 377        (type.length == 1 || (type.length % 4 == 0))) {
 378       struct lp_type tmp_type;
 379       LLVMValueRef tmp;
 380
 381       memset(&tmp_type, 0, sizeof tmp_type);
 382       tmp_type.width = 8;
 383       tmp_type.length = type.length * 4;
 384       tmp_type.norm = TRUE;
 385
 386       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 387                                     base_ptr, offset, i, j);
 388
 389       lp_build_rgba8_to_fi32_soa(gallivm,
 390                                 type,
 391                                 tmp,
 392                                 rgba_out);
 393
 394       return;
 395    }
 396
 397    /*
 398     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
 399     *
 400     * This is not the most efficient way of fetching pixels, as we
 401     * miss some opportunities to do vectorization, but this is
 402     * convenient for formats or scenarios for which there was no
 403     * opportunity or incentive to optimize.
 404     */
 405
 406    {
 407       unsigned k, chan;
 408       struct lp_type tmp_type;
 409
 410       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 411          debug_printf("%s: scalar unpacking of %s\n",
 412                       __FUNCTION__, format_desc->short_name);
 413       }
 414
 415       tmp_type = type;
 416       tmp_type.length = 4;
 417
 418       for (chan = 0; chan < 4; ++chan) {
 419          rgba_out[chan] = lp_build_undef(gallivm, type);
 420       }
 421
 422       /* loop over number of pixels */
 423       for(k = 0; k < type.length; ++k) {
 424          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 425          LLVMValueRef offset_elem;
 426          LLVMValueRef i_elem, j_elem;
 427          LLVMValueRef tmp;
 428
 429          offset_elem = LLVMBuildExtractElement(builder, offset,
 430                                                index, "");
 431
 432          i_elem = LLVMBuildExtractElement(builder, i, index, "");
 433          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 434
 435          /* Get a single float[4]={R,G,B,A} pixel */
 436          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
 437                                        base_ptr, offset_elem,
 438                                        i_elem, j_elem);
 439
 440          /*
 441           * Insert the AoS tmp value channels into the SoA result vectors at
 442           * position = 'index'.
 443           */
 444          for (chan = 0; chan < 4; ++chan) {
 445             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
 446             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
 447             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
 448                                                     tmp_chan, index, "");
 449          }
 450       }
 451    }
 452 }