src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * AoS pixel format manipulation.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_format.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_math.h"
  39 #include "util/u_pointer.h"
  40 #include "util/u_string.h"
  41
  42 #include "lp_bld_arit.h"
  43 #include "lp_bld_init.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_flow.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_swizzle.h"
  49 #include "lp_bld_gather.h"
  50 #include "lp_bld_debug.h"
  51 #include "lp_bld_format.h"
  52 #include "lp_bld_intr.h"
  53
  54
  55 /**
  56  * Basic swizzling.  Rearrange the order of the unswizzled array elements
  57  * according to the format description.  PIPE_SWIZZLE_ZERO/ONE are supported
  58  * too.
  59  * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
  60  */
  61 LLVMValueRef
  62 lp_build_format_swizzle_aos(const struct util_format_description *desc,
  63                             struct lp_build_context *bld,
  64                             LLVMValueRef unswizzled)
  65 {
  66    unsigned char swizzles[4];
  67    unsigned chan;
  68
  69    assert(bld->type.length % 4 == 0);
  70
  71    for (chan = 0; chan < 4; ++chan) {
  72       enum util_format_swizzle swizzle;
  73
  74       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  75          /*
  76           * For ZS formats do RGBA = ZZZ1
  77           */
  78          if (chan == 3) {
  79             swizzle = UTIL_FORMAT_SWIZZLE_1;
  80          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
  81             swizzle = UTIL_FORMAT_SWIZZLE_0;
  82          } else {
  83             swizzle = desc->swizzle[0];
  84          }
  85       } else {
  86          swizzle = desc->swizzle[chan];
  87       }
  88       swizzles[chan] = swizzle;
  89    }
  90
  91    return lp_build_swizzle_aos(bld, unswizzled, swizzles);
  92 }
  93
  94
  95 /**
  96  * Whether the format matches the vector type, apart of swizzles.
  97  */
  98 static INLINE boolean
  99 format_matches_type(const struct util_format_description *desc,
 100                     struct lp_type type)
 101 {
 102    enum util_format_type chan_type;
 103    unsigned chan;
 104
 105    assert(type.length % 4 == 0);
 106
 107    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
 108        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
 109        desc->block.width != 1 ||
 110        desc->block.height != 1) {
 111       return FALSE;
 112    }
 113
 114    if (type.floating) {
 115       chan_type = UTIL_FORMAT_TYPE_FLOAT;
 116    } else if (type.fixed) {
 117       chan_type = UTIL_FORMAT_TYPE_FIXED;
 118    } else if (type.sign) {
 119       chan_type = UTIL_FORMAT_TYPE_SIGNED;
 120    } else {
 121       chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
 122    }
 123
 124    for (chan = 0; chan < desc->nr_channels; ++chan) {
 125       if (desc->channel[chan].size != type.width) {
 126          return FALSE;
 127       }
 128
 129       if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
 130          if (desc->channel[chan].type != chan_type ||
 131              desc->channel[chan].normalized != type.norm) {
 132             return FALSE;
 133          }
 134       }
 135    }
 136
 137    return TRUE;
 138 }
 139
 140
 141 /**
 142  * Unpack a single pixel into its RGBA components.
 143  *
 144  * @param desc  the pixel format for the packed pixel value
 145  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
 146  *
 147  * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
 148  */
 149 static INLINE LLVMValueRef
 150 lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
 151                                const struct util_format_description *desc,
 152                                LLVMValueRef packed)
 153 {
 154    LLVMBuilderRef builder = gallivm->builder;
 155    LLVMValueRef shifted, casted, scaled, masked;
 156    LLVMValueRef shifts[4];
 157    LLVMValueRef masks[4];
 158    LLVMValueRef scales[4];
 159
 160    boolean normalized;
 161    boolean needs_uitofp;
 162    unsigned shift;
 163    unsigned i;
 164
 165    /* TODO: Support more formats */
 166    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 167    assert(desc->block.width == 1);
 168    assert(desc->block.height == 1);
 169    assert(desc->block.bits <= 32);
 170
 171    /* Do the intermediate integer computations with 32bit integers since it
 172     * matches floating point size */
 173    assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));
 174
 175 #ifdef PIPE_ARCH_BIG_ENDIAN
 176    packed = lp_build_bswap(gallivm, packed, lp_type_uint(32));
 177 #endif
 178
 179    /* Broadcast the packed value to all four channels
 180     * before: packed = BGRA
 181     * after: packed = {BGRA, BGRA, BGRA, BGRA}
 182     */
 183    packed = LLVMBuildInsertElement(builder,
 184                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
 185                                    packed,
 186                                    LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
 187                                    "");
 188    packed = LLVMBuildShuffleVector(builder,
 189                                    packed,
 190                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
 191                                    LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
 192                                    "");
 193
 194    /* Initialize vector constants */
 195    normalized = FALSE;
 196    needs_uitofp = FALSE;
 197    shift = 0;
 198
 199    /* Loop over 4 color components */
 200    for (i = 0; i < 4; ++i) {
 201       unsigned bits = desc->channel[i].size;
 202
 203       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 204          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 205          masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
 206          scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
 207       }
 208       else {
 209          unsigned long long mask = (1ULL << bits) - 1;
 210
 211          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 212
 213          if (bits == 32) {
 214             needs_uitofp = TRUE;
 215          }
 216
 217          shifts[i] = lp_build_const_int32(gallivm, shift);
 218          masks[i] = lp_build_const_int32(gallivm, mask);
 219
 220          if (desc->channel[i].normalized) {
 221             scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
 222             normalized = TRUE;
 223          }
 224          else
 225             scales[i] =  lp_build_const_float(gallivm, 1.0);
 226       }
 227
 228       shift += bits;
 229    }
 230
 231    /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
 232     * into masked = {B, G, R, A}
 233     */
 234    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
 235    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
 236
 237    if (!needs_uitofp) {
 238       /* UIToFP can't be expressed in SSE2 */
 239       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
 240    } else {
 241       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
 242    }
 243
 244    /* At this point 'casted' may be a vector of floats such as
 245     * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
 246     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
 247     */
 248
 249    if (normalized)
 250       scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
 251    else
 252       scaled = casted;
 253
 254    return scaled;
 255 }
 256
 257
 258 /**
 259  * Pack a single pixel.
 260  *
 261  * @param rgba 4 float vector with the unpacked components.
 262  *
 263  * XXX: This is mostly for reference and testing -- operating a single pixel at
 264  * a time is rarely if ever needed.
 265  */
 266 LLVMValueRef
 267 lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
 268                        const struct util_format_description *desc,
 269                        LLVMValueRef rgba)
 270 {
 271    LLVMBuilderRef builder = gallivm->builder;
 272    LLVMTypeRef type;
 273    LLVMValueRef packed = NULL;
 274    LLVMValueRef swizzles[4];
 275    LLVMValueRef shifted, casted, scaled, unswizzled;
 276    LLVMValueRef shifts[4];
 277    LLVMValueRef scales[4];
 278    boolean normalized;
 279    unsigned shift;
 280    unsigned i, j;
 281
 282    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 283    assert(desc->block.width == 1);
 284    assert(desc->block.height == 1);
 285
 286    type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);
 287
 288    /* Unswizzle the color components into the source vector. */
 289    for (i = 0; i < 4; ++i) {
 290       for (j = 0; j < 4; ++j) {
 291          if (desc->swizzle[j] == i)
 292             break;
 293       }
 294       if (j < 4)
 295          swizzles[i] = lp_build_const_int32(gallivm, j);
 296       else
 297          swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 298    }
 299
 300    unswizzled = LLVMBuildShuffleVector(builder, rgba,
 301                                        LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
 302                                        LLVMConstVector(swizzles, 4), "");
 303
 304    normalized = FALSE;
 305    shift = 0;
 306    for (i = 0; i < 4; ++i) {
 307       unsigned bits = desc->channel[i].size;
 308
 309       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 310          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 311          scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
 312       }
 313       else {
 314          unsigned mask = (1 << bits) - 1;
 315
 316          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 317          assert(bits < 32);
 318
 319          shifts[i] = lp_build_const_int32(gallivm, shift);
 320
 321          if (desc->channel[i].normalized) {
 322             scales[i] = lp_build_const_float(gallivm, mask);
 323             normalized = TRUE;
 324          }
 325          else
 326             scales[i] = lp_build_const_float(gallivm, 1.0);
 327       }
 328
 329       shift += bits;
 330    }
 331
 332    if (normalized)
 333       scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
 334    else
 335       scaled = unswizzled;
 336
 337    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");
 338
 339    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
 340
 341    /* Bitwise or all components */
 342    for (i = 0; i < 4; ++i) {
 343       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
 344          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
 345                                                lp_build_const_int32(gallivm, i), "");
 346          if (packed)
 347             packed = LLVMBuildOr(builder, packed, component, "");
 348          else
 349             packed = component;
 350       }
 351    }
 352
 353    if (!packed)
 354       packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 355
 356    if (desc->block.bits < 32)
 357       packed = LLVMBuildTrunc(builder, packed, type, "");
 358
 359    return packed;
 360 }
 361
 362
 363
 364
 365 /**
 366  * Fetch a pixel into a 4 float AoS.
 367  *
 368  * \param format_desc  describes format of the image we're fetching from
 369  * \param ptr  address of the pixel block (or the texel if uncompressed)
 370  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 371  *              these will always be (0, 0).
 372  * \return  a 4 element vector with the pixel's RGBA values.
 373  */
 374 LLVMValueRef
 375 lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
 376                         const struct util_format_description *format_desc,
 377                         struct lp_type type,
 378                         LLVMValueRef base_ptr,
 379                         LLVMValueRef offset,
 380                         LLVMValueRef i,
 381                         LLVMValueRef j)
 382 {
 383    LLVMBuilderRef builder = gallivm->builder;
 384    unsigned num_pixels = type.length / 4;
 385    struct lp_build_context bld;
 386
 387    assert(type.length <= LP_MAX_VECTOR_LENGTH);
 388    assert(type.length % 4 == 0);
 389
 390    lp_build_context_init(&bld, gallivm, type);
 391
 392    /*
 393     * Trivial case
 394     *
 395     * The format matches the type (apart of a swizzle) so no need for
 396     * scaling or converting.
 397     */
 398
 399    if (format_matches_type(format_desc, type) &&
 400        format_desc->block.bits <= type.width * 4 &&
 401        util_is_power_of_two(format_desc->block.bits)) {
 402       LLVMValueRef packed;
 403       LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);
 404       unsigned vec_len = type.width * type.length;
 405
 406       /*
 407        * The format matches the type (apart of a swizzle) so no need for
 408        * scaling or converting.
 409        */
 410
 411       packed = lp_build_gather(gallivm, type.length/4,
 412                                format_desc->block.bits, type.width*4,
 413                                base_ptr, offset);
 414
 415       assert(format_desc->block.bits <= vec_len);
 416
 417       packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
 418 #ifdef PIPE_ARCH_BIG_ENDIAN
 419       if (type.floating)
 420          packed = lp_build_bswap_vec(gallivm, packed, type,
 421                                     lp_type_float_vec(type.width, vec_len));
 422 #endif
 423       return lp_build_format_swizzle_aos(format_desc, &bld, packed);
 424    }
 425
 426    /*
 427     * Bit arithmetic
 428     */
 429
 430    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 431        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 432         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 433        format_desc->block.width == 1 &&
 434        format_desc->block.height == 1 &&
 435        util_is_power_of_two(format_desc->block.bits) &&
 436        format_desc->block.bits <= 32 &&
 437        format_desc->is_bitmask &&
 438        !format_desc->is_mixed &&
 439        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
 440         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) &&
 441        !format_desc->channel[0].pure_integer) {
 442
 443       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
 444       LLVMValueRef res;
 445       unsigned k;
 446
 447       /*
 448        * Unpack a pixel at a time into a <4 x float> RGBA vector
 449        */
 450
 451       for (k = 0; k < num_pixels; ++k) {
 452          LLVMValueRef packed;
 453
 454          packed = lp_build_gather_elem(gallivm, num_pixels,
 455                                        format_desc->block.bits, 32,
 456                                        base_ptr, offset, k);
 457
 458          tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
 459                                                   format_desc,
 460                                                   packed);
 461       }
 462
 463       /*
 464        * Type conversion.
 465        *
 466        * TODO: We could avoid floating conversion for integer to
 467        * integer conversions.
 468        */
 469
 470       if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
 471          debug_printf("%s: unpacking %s with floating point\n",
 472                       __FUNCTION__, format_desc->short_name);
 473       }
 474
 475       lp_build_conv(gallivm,
 476                     lp_float32_vec4_type(),
 477                     type,
 478                     tmps, num_pixels, &res, 1);
 479
 480       return lp_build_format_swizzle_aos(format_desc, &bld, res);
 481    }
 482
 483    /* If all channels are of same type and we are not using half-floats */
 484    if (format_desc->is_array &&
 485        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) {
 486       assert(!format_desc->is_mixed);
 487       return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);
 488    }
 489
 490    /*
 491     * YUV / subsampled formats
 492     */
 493
 494    if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
 495       struct lp_type tmp_type;
 496       LLVMValueRef tmp;
 497
 498       memset(&tmp_type, 0, sizeof tmp_type);
 499       tmp_type.width = 8;
 500       tmp_type.length = num_pixels * 4;
 501       tmp_type.norm = TRUE;
 502
 503       tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
 504                                                format_desc,
 505                                                num_pixels,
 506                                                base_ptr,
 507                                                offset,
 508                                                i, j);
 509
 510       lp_build_conv(gallivm,
 511                     tmp_type, type,
 512                     &tmp, 1, &tmp, 1);
 513
 514       return tmp;
 515    }
 516
 517    /*
 518     * Fallback to util_format_description::fetch_rgba_8unorm().
 519     */
 520
 521    if (format_desc->fetch_rgba_8unorm &&
 522        !type.floating && type.width == 8 && !type.sign && type.norm) {
 523       /*
 524        * Fallback to calling util_format_description::fetch_rgba_8unorm.
 525        *
 526        * This is definitely not the most efficient way of fetching pixels, as
 527        * we miss the opportunity to do vectorization, but this it is a
 528        * convenient for formats or scenarios for which there was no opportunity
 529        * or incentive to optimize.
 530        */
 531
 532       LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
 533       LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
 534       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
 535       LLVMValueRef function;
 536       LLVMValueRef tmp_ptr;
 537       LLVMValueRef tmp;
 538       LLVMValueRef res;
 539       unsigned k;
 540
 541       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 542          debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
 543                       __FUNCTION__, format_desc->short_name);
 544       }
 545
 546       /*
 547        * Declare and bind format_desc->fetch_rgba_8unorm().
 548        */
 549
 550       {
 551          /*
 552           * Function to call looks like:
 553           *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
 554           */
 555          LLVMTypeRef ret_type;
 556          LLVMTypeRef arg_types[4];
 557          LLVMTypeRef function_type;
 558
 559          ret_type = LLVMVoidTypeInContext(gallivm->context);
 560          arg_types[0] = pi8t;
 561          arg_types[1] = pi8t;
 562          arg_types[2] = i32t;
 563          arg_types[3] = i32t;
 564          function_type = LLVMFunctionType(ret_type, arg_types,
 565                                           Elements(arg_types), 0);
 566
 567          /* make const pointer for the C fetch_rgba_8unorm function */
 568          function = lp_build_const_int_pointer(gallivm,
 569             func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
 570
 571          /* cast the callee pointer to the function's type */
 572          function = LLVMBuildBitCast(builder, function,
 573                                      LLVMPointerType(function_type, 0),
 574                                      "cast callee");
 575       }
 576
 577       tmp_ptr = lp_build_alloca(gallivm, i32t, "");
 578
 579       res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
 580
 581       /*
 582        * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
 583        * in the SoA vectors.
 584        */
 585
 586       for (k = 0; k < num_pixels; ++k) {
 587          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 588          LLVMValueRef args[4];
 589
 590          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
 591          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
 592                                             base_ptr, offset, k);
 593
 594          if (num_pixels == 1) {
 595             args[2] = i;
 596             args[3] = j;
 597          }
 598          else {
 599             args[2] = LLVMBuildExtractElement(builder, i, index, "");
 600             args[3] = LLVMBuildExtractElement(builder, j, index, "");
 601          }
 602
 603          LLVMBuildCall(builder, function, args, Elements(args), "");
 604
 605          tmp = LLVMBuildLoad(builder, tmp_ptr, "");
 606
 607          if (num_pixels == 1) {
 608             res = tmp;
 609          }
 610          else {
 611             res = LLVMBuildInsertElement(builder, res, tmp, index, "");
 612          }
 613       }
 614
 615       /* Bitcast from <n x i32> to <4n x i8> */
 616       res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
 617
 618       return res;
 619    }
 620
 621    /*
 622     * Fallback to util_format_description::fetch_rgba_float().
 623     */
 624
 625    if (format_desc->fetch_rgba_float) {
 626       /*
 627        * Fallback to calling util_format_description::fetch_rgba_float.
 628        *
 629        * This is definitely not the most efficient way of fetching pixels, as
 630        * we miss the opportunity to do vectorization, but this it is a
 631        * convenient for formats or scenarios for which there was no opportunity
 632        * or incentive to optimize.
 633        */
 634
 635       LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
 636       LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
 637       LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
 638       LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
 639       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
 640       LLVMValueRef function;
 641       LLVMValueRef tmp_ptr;
 642       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
 643       LLVMValueRef res;
 644       unsigned k;
 645
 646       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 647          debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
 648                       __FUNCTION__, format_desc->short_name);
 649       }
 650
 651       /*
 652        * Declare and bind format_desc->fetch_rgba_float().
 653        */
 654
 655       {
 656          /*
 657           * Function to call looks like:
 658           *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
 659           */
 660          LLVMTypeRef ret_type;
 661          LLVMTypeRef arg_types[4];
 662
 663          ret_type = LLVMVoidTypeInContext(gallivm->context);
 664          arg_types[0] = pf32t;
 665          arg_types[1] = pi8t;
 666          arg_types[2] = i32t;
 667          arg_types[3] = i32t;
 668
 669          function = lp_build_const_func_pointer(gallivm,
 670                                                 func_to_pointer((func_pointer) format_desc->fetch_rgba_float),
 671                                                 ret_type,
 672                                                 arg_types, Elements(arg_types),
 673                                                 format_desc->short_name);
 674       }
 675
 676       tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
 677
 678       /*
 679        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
 680        * in the SoA vectors.
 681        */
 682
 683       for (k = 0; k < num_pixels; ++k) {
 684          LLVMValueRef args[4];
 685
 686          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
 687          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
 688                                             base_ptr, offset, k);
 689
 690          if (num_pixels == 1) {
 691             args[2] = i;
 692             args[3] = j;
 693          }
 694          else {
 695             LLVMValueRef index = lp_build_const_int32(gallivm, k);
 696             args[2] = LLVMBuildExtractElement(builder, i, index, "");
 697             args[3] = LLVMBuildExtractElement(builder, j, index, "");
 698          }
 699
 700          LLVMBuildCall(builder, function, args, Elements(args), "");
 701
 702          tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
 703       }
 704
 705       lp_build_conv(gallivm,
 706                     lp_float32_vec4_type(),
 707                     type,
 708                     tmps, num_pixels, &res, 1);
 709
 710       return res;
 711    }
 712
 713    assert(!util_format_is_pure_integer(format_desc->format));
 714
 715    assert(0);
 716    return lp_build_undef(gallivm, type);
 717 }