src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_defines.h"
  30
  31 #include "util/u_format.h"
  32 #include "util/u_memory.h"
  33 #include "util/u_string.h"
  34
  35 #include "lp_bld_type.h"
  36 #include "lp_bld_const.h"
  37 #include "lp_bld_conv.h"
  38 #include "lp_bld_swizzle.h"
  39 #include "lp_bld_gather.h"
  40 #include "lp_bld_format.h"
  41
  42
  43 void
  44 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  45                             struct lp_build_context *bld,
  46                             const LLVMValueRef *unswizzled,
  47                             LLVMValueRef swizzled_out[4])
  48 {
  49    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
  50    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
  51
  52    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  53       /*
  54        * Return zzz1 for depth-stencil formats.
  55        *
  56        * XXX: Allow to control the depth swizzle with an additional parameter,
  57        * as the caller may wish another depth swizzle, or retain the stencil
  58        * value.
  59        */
  60       enum util_format_swizzle swizzle = format_desc->swizzle[0];
  61       LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  62       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
  63       swizzled_out[3] = bld->one;
  64    }
  65    else {
  66       unsigned chan;
  67       for (chan = 0; chan < 4; ++chan) {
  68          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
  69          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  70       }
  71    }
  72 }
  73
  74
  75 /**
  76  * Unpack several pixels in SoA.
  77  *
  78  * It takes a vector of packed pixels:
  79  *
  80  *   packed = {P0, P1, P2, P3, ..., Pn}
  81  *
  82  * And will produce four vectors:
  83  *
  84  *   red    = {R0, R1, R2, R3, ..., Rn}
  85  *   green  = {G0, G1, G2, G3, ..., Gn}
  86  *   blue   = {B0, B1, B2, B3, ..., Bn}
  87  *   alpha  = {A0, A1, A2, A3, ..., An}
  88  *
  89  * It requires that a packed pixel fits into an element of the output
  90  * channels. The common case is when converting pixel with a depth of 32 bit or
  91  * less into floats.
  92  *
  93  * \param format_desc  the format of the 'packed' incoming pixel vector
  94  * \param type  the desired type for rgba_out (type.length = n, above)
  95  * \param packed  the incoming vector of packed pixels
  96  * \param rgba_out  returns the SoA R,G,B,A vectors
  97  */
  98 void
  99 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
 100                          const struct util_format_description *format_desc,
 101                          struct lp_type type,
 102                          LLVMValueRef packed,
 103                          LLVMValueRef rgba_out[4])
 104 {
 105    struct lp_build_context bld;
 106    LLVMValueRef inputs[4];
 107    unsigned start;
 108    unsigned chan;
 109
 110    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 111    assert(format_desc->block.width == 1);
 112    assert(format_desc->block.height == 1);
 113    assert(format_desc->block.bits <= type.width);
 114    /* FIXME: Support more output types */
 115    assert(type.floating);
 116    assert(type.width == 32);
 117
 118    lp_build_context_init(&bld, builder, type);
 119
 120    /* Decode the input vector components */
 121    start = 0;
 122    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
 123       const unsigned width = format_desc->channel[chan].size;
 124       const unsigned stop = start + width;
 125       LLVMValueRef input;
 126
 127       input = packed;
 128
 129       switch(format_desc->channel[chan].type) {
 130       case UTIL_FORMAT_TYPE_VOID:
 131          input = lp_build_undef(type);
 132          break;
 133
 134       case UTIL_FORMAT_TYPE_UNSIGNED:
 135          /*
 136           * Align the LSB
 137           */
 138
 139          if (start) {
 140             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
 141          }
 142
 143          /*
 144           * Zero the MSBs
 145           */
 146
 147          if (stop < format_desc->block.bits) {
 148             unsigned mask = ((unsigned long long)1 << width) - 1;
 149             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
 150          }
 151
 152          /*
 153           * Type conversion
 154           */
 155
 156          if (type.floating) {
 157             if(format_desc->channel[chan].normalized)
 158                input = lp_build_unsigned_norm_to_float(builder, width, type, input);
 159             else
 160                input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
 161          }
 162          else {
 163             /* FIXME */
 164             assert(0);
 165             input = lp_build_undef(type);
 166          }
 167
 168          break;
 169
 170       case UTIL_FORMAT_TYPE_SIGNED:
 171          /*
 172           * Align the sign bit first.
 173           */
 174
 175          if (stop < type.width) {
 176             unsigned bits = type.width - stop;
 177             LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
 178             input = LLVMBuildShl(builder, input, bits_val, "");
 179          }
 180
 181          /*
 182           * Align the LSB (with an arithmetic shift to preserve the sign)
 183           */
 184
 185          if (format_desc->channel[chan].size < type.width) {
 186             unsigned bits = type.width - format_desc->channel[chan].size;
 187             LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
 188             input = LLVMBuildAShr(builder, input, bits_val, "");
 189          }
 190
 191          /*
 192           * Type conversion
 193           */
 194
 195          if (type.floating) {
 196             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
 197             if (format_desc->channel[chan].normalized) {
 198                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
 199                LLVMValueRef scale_val = lp_build_const_vec(type, scale);
 200                input = LLVMBuildFMul(builder, input, scale_val, "");
 201             }
 202          }
 203          else {
 204             /* FIXME */
 205             assert(0);
 206             input = lp_build_undef(type);
 207          }
 208
 209          break;
 210
 211       case UTIL_FORMAT_TYPE_FLOAT:
 212          if (type.floating) {
 213             assert(start == 0);
 214             assert(stop == 32);
 215             assert(type.width == 32);
 216             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
 217          }
 218          else {
 219             /* FIXME */
 220             assert(0);
 221             input = lp_build_undef(type);
 222          }
 223          break;
 224
 225       case UTIL_FORMAT_TYPE_FIXED:
 226          if (type.floating) {
 227             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
 228             LLVMValueRef scale_val = lp_build_const_vec(type, scale);
 229             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
 230             input = LLVMBuildFMul(builder, input, scale_val, "");
 231          }
 232          else {
 233             /* FIXME */
 234             assert(0);
 235             input = lp_build_undef(type);
 236          }
 237          break;
 238
 239       default:
 240          assert(0);
 241          input = lp_build_undef(type);
 242          break;
 243       }
 244
 245       inputs[chan] = input;
 246
 247       start = stop;
 248    }
 249
 250    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 251 }
 252
 253
 254 void
 255 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
 256                           struct lp_type dst_type,
 257                           LLVMValueRef packed,
 258                           LLVMValueRef *rgba)
 259 {
 260    LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
 261    unsigned chan;
 262
 263    packed = LLVMBuildBitCast(builder, packed,
 264                              lp_build_int_vec_type(dst_type), "");
 265
 266    /* Decode the input vector components */
 267    for (chan = 0; chan < 4; ++chan) {
 268       unsigned start = chan*8;
 269       unsigned stop = start + 8;
 270       LLVMValueRef input;
 271
 272       input = packed;
 273
 274       if (start)
 275          input = LLVMBuildLShr(builder, input,
 276                                lp_build_const_int_vec(dst_type, start), "");
 277
 278       if (stop < 32)
 279          input = LLVMBuildAnd(builder, input, mask, "");
 280
 281       input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
 282
 283       rgba[chan] = input;
 284    }
 285 }
 286
 287
 288
 289 /**
 290  * Fetch a texels from a texture, returning them in SoA layout.
 291  *
 292  * \param type  the desired return type for 'rgba'.  The vector length
 293  *              is the number of texels to fetch
 294  *
 295  * \param base_ptr  points to start of the texture image block.  For non-
 296  *                  compressed formats, this simply points to the texel.
 297  *                  For compressed formats, it points to the start of the
 298  *                  compressed data block.
 299  *
 300  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 301  *              these will always be (0,0).  For compressed formats, i will
 302  *              be in [0, block_width-1] and j will be in [0, block_height-1].
 303  */
 304 void
 305 lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
 306                         const struct util_format_description *format_desc,
 307                         struct lp_type type,
 308                         LLVMValueRef base_ptr,
 309                         LLVMValueRef offset,
 310                         LLVMValueRef i,
 311                         LLVMValueRef j,
 312                         LLVMValueRef rgba_out[4])
 313 {
 314
 315    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 316        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 317         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 318        format_desc->block.width == 1 &&
 319        format_desc->block.height == 1 &&
 320        format_desc->block.bits <= type.width &&
 321        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
 322         format_desc->channel[0].size == 32))
 323    {
 324       /*
 325        * The packed pixel fits into an element of the destination format. Put
 326        * the packed pixels into a vector and extract each component for all
 327        * vector elements in parallel.
 328        */
 329
 330       LLVMValueRef packed;
 331
 332       /*
 333        * gather the texels from the texture
 334        * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
 335        */
 336       packed = lp_build_gather(builder,
 337                                type.length,
 338                                format_desc->block.bits,
 339                                type.width,
 340                                base_ptr, offset);
 341
 342       /*
 343        * convert texels to float rgba
 344        */
 345       lp_build_unpack_rgba_soa(builder,
 346                                format_desc,
 347                                type,
 348                                packed, rgba_out);
 349       return;
 350    }
 351
 352    /*
 353     * Try calling lp_build_fetch_rgba_aos for all pixels.
 354     */
 355
 356    if (util_format_fits_8unorm(format_desc) &&
 357        type.floating && type.width == 32 && type.length == 4) {
 358       struct lp_type tmp_type;
 359       LLVMValueRef tmp;
 360
 361       memset(&tmp_type, 0, sizeof tmp_type);
 362       tmp_type.width = 8;
 363       tmp_type.length = type.length * 4;
 364       tmp_type.norm = TRUE;
 365
 366       tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
 367                                     base_ptr, offset, i, j);
 368
 369       lp_build_rgba8_to_f32_soa(builder,
 370                                 type,
 371                                 tmp,
 372                                 rgba_out);
 373
 374       return;
 375    }
 376
 377    /*
 378     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
 379     *
 380     * This is not the most efficient way of fetching pixels, as we
 381     * miss some opportunities to do vectorization, but this is
 382     * convenient for formats or scenarios for which there was no
 383     * opportunity or incentive to optimize.
 384     */
 385
 386    {
 387       unsigned k, chan;
 388       struct lp_type tmp_type;
 389
 390       tmp_type = type;
 391       tmp_type.length = 4;
 392
 393       for (chan = 0; chan < 4; ++chan) {
 394          rgba_out[chan] = lp_build_undef(type);
 395       }
 396
 397       /* loop over number of pixels */
 398       for(k = 0; k < type.length; ++k) {
 399          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
 400          LLVMValueRef offset_elem;
 401          LLVMValueRef i_elem, j_elem;
 402          LLVMValueRef tmp;
 403
 404          offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
 405
 406          i_elem = LLVMBuildExtractElement(builder, i, index, "");
 407          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 408
 409          /* Get a single float[4]={R,G,B,A} pixel */
 410          tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
 411                                        base_ptr, offset_elem,
 412                                        i_elem, j_elem);
 413
 414          /*
 415           * Insert the AoS tmp value channels into the SoA result vectors at
 416           * position = 'index'.
 417           */
 418          for (chan = 0; chan < 4; ++chan) {
 419             LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
 420             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
 421             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
 422                                                     tmp_chan, index, "");
 423          }
 424       }
 425    }
 426 }