src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * AoS pixel format manipulation.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_format.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_math.h"
  39 #include "util/u_string.h"
  40
  41 #include "lp_bld_arit.h"
  42 #include "lp_bld_init.h"
  43 #include "lp_bld_type.h"
  44 #include "lp_bld_flow.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_swizzle.h"
  48 #include "lp_bld_gather.h"
  49 #include "lp_bld_debug.h"
  50 #include "lp_bld_format.h"
  51
  52
  53 /**
  54  * Basic swizzling.  Rearrange the order of the unswizzled array elements
  55  * according to the format description.  PIPE_SWIZZLE_ZERO/ONE are supported
  56  * too.
  57  * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
  58  */
  59 LLVMValueRef
  60 lp_build_format_swizzle_aos(const struct util_format_description *desc,
  61                             struct lp_build_context *bld,
  62                             LLVMValueRef unswizzled)
  63 {
  64    unsigned char swizzles[4];
  65    unsigned chan;
  66
  67    assert(bld->type.length % 4 == 0);
  68
  69    for (chan = 0; chan < 4; ++chan) {
  70       enum util_format_swizzle swizzle;
  71
  72       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  73          /*
  74           * For ZS formats do RGBA = ZZZ1
  75           */
  76          if (chan == 3) {
  77             swizzle = UTIL_FORMAT_SWIZZLE_1;
  78          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
  79             swizzle = UTIL_FORMAT_SWIZZLE_0;
  80          } else {
  81             swizzle = desc->swizzle[0];
  82          }
  83       } else {
  84          swizzle = desc->swizzle[chan];
  85       }
  86       swizzles[chan] = swizzle;
  87    }
  88
  89    return lp_build_swizzle_aos(bld, unswizzled, swizzles);
  90 }
  91
  92
  93 /**
  94  * Whether the format matches the vector type, apart of swizzles.
  95  */
  96 static INLINE boolean
  97 format_matches_type(const struct util_format_description *desc,
  98                     struct lp_type type)
  99 {
 100    enum util_format_type chan_type;
 101    unsigned chan;
 102
 103    assert(type.length % 4 == 0);
 104
 105    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
 106        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
 107        desc->block.width != 1 ||
 108        desc->block.height != 1) {
 109       return FALSE;
 110    }
 111
 112    if (type.floating) {
 113       chan_type = UTIL_FORMAT_TYPE_FLOAT;
 114    } else if (type.fixed) {
 115       chan_type = UTIL_FORMAT_TYPE_FIXED;
 116    } else if (type.sign) {
 117       chan_type = UTIL_FORMAT_TYPE_SIGNED;
 118    } else {
 119       chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
 120    }
 121
 122    for (chan = 0; chan < desc->nr_channels; ++chan) {
 123       if (desc->channel[chan].size != type.width) {
 124          return FALSE;
 125       }
 126
 127       if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
 128          if (desc->channel[chan].type != chan_type ||
 129              desc->channel[chan].normalized != type.norm) {
 130             return FALSE;
 131          }
 132       }
 133    }
 134
 135    return TRUE;
 136 }
 137
 138
 139 /**
 140  * Unpack a single pixel into its RGBA components.
 141  *
 142  * @param desc  the pixel format for the packed pixel value
 143  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
 144  *
 145  * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
 146  */
 147 static INLINE LLVMValueRef
 148 lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
 149                                const struct util_format_description *desc,
 150                                LLVMValueRef packed)
 151 {
 152    LLVMBuilderRef builder = gallivm->builder;
 153    LLVMValueRef shifted, casted, scaled, masked;
 154    LLVMValueRef shifts[4];
 155    LLVMValueRef masks[4];
 156    LLVMValueRef scales[4];
 157
 158    boolean normalized;
 159    boolean needs_uitofp;
 160    unsigned shift;
 161    unsigned i;
 162
 163    /* TODO: Support more formats */
 164    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 165    assert(desc->block.width == 1);
 166    assert(desc->block.height == 1);
 167    assert(desc->block.bits <= 32);
 168
 169    /* Do the intermediate integer computations with 32bit integers since it
 170     * matches floating point size */
 171    assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));
 172
 173    /* Broadcast the packed value to all four channels
 174     * before: packed = BGRA
 175     * after: packed = {BGRA, BGRA, BGRA, BGRA}
 176     */
 177    packed = LLVMBuildInsertElement(builder,
 178                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
 179                                    packed,
 180                                    LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
 181                                    "");
 182    packed = LLVMBuildShuffleVector(builder,
 183                                    packed,
 184                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
 185                                    LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
 186                                    "");
 187
 188    /* Initialize vector constants */
 189    normalized = FALSE;
 190    needs_uitofp = FALSE;
 191    shift = 0;
 192
 193    /* Loop over 4 color components */
 194    for (i = 0; i < 4; ++i) {
 195       unsigned bits = desc->channel[i].size;
 196
 197       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 198          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 199          masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
 200          scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
 201       }
 202       else {
 203          unsigned long long mask = (1ULL << bits) - 1;
 204
 205          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 206
 207          if (bits == 32) {
 208             needs_uitofp = TRUE;
 209          }
 210
 211          shifts[i] = lp_build_const_int32(gallivm, shift);
 212          masks[i] = lp_build_const_int32(gallivm, mask);
 213
 214          if (desc->channel[i].normalized) {
 215             scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
 216             normalized = TRUE;
 217          }
 218          else
 219             scales[i] =  lp_build_const_float(gallivm, 1.0);
 220       }
 221
 222       shift += bits;
 223    }
 224
 225    /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
 226     * into masked = {B, G, R, A}
 227     */
 228    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
 229    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
 230
 231
 232    if (!needs_uitofp) {
 233       /* UIToFP can't be expressed in SSE2 */
 234       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
 235    } else {
 236       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
 237    }
 238
 239    /* At this point 'casted' may be a vector of floats such as
 240     * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
 241     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
 242     */
 243
 244    if (normalized)
 245       scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
 246    else
 247       scaled = casted;
 248
 249    return scaled;
 250 }
 251
 252
 253 /**
 254  * Pack a single pixel.
 255  *
 256  * @param rgba 4 float vector with the unpacked components.
 257  *
 258  * XXX: This is mostly for reference and testing -- operating a single pixel at
 259  * a time is rarely if ever needed.
 260  */
 261 LLVMValueRef
 262 lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
 263                        const struct util_format_description *desc,
 264                        LLVMValueRef rgba)
 265 {
 266    LLVMBuilderRef builder = gallivm->builder;
 267    LLVMTypeRef type;
 268    LLVMValueRef packed = NULL;
 269    LLVMValueRef swizzles[4];
 270    LLVMValueRef shifted, casted, scaled, unswizzled;
 271    LLVMValueRef shifts[4];
 272    LLVMValueRef scales[4];
 273    boolean normalized;
 274    unsigned shift;
 275    unsigned i, j;
 276
 277    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 278    assert(desc->block.width == 1);
 279    assert(desc->block.height == 1);
 280
 281    type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);
 282
 283    /* Unswizzle the color components into the source vector. */
 284    for (i = 0; i < 4; ++i) {
 285       for (j = 0; j < 4; ++j) {
 286          if (desc->swizzle[j] == i)
 287             break;
 288       }
 289       if (j < 4)
 290          swizzles[i] = lp_build_const_int32(gallivm, j);
 291       else
 292          swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 293    }
 294
 295    unswizzled = LLVMBuildShuffleVector(builder, rgba,
 296                                        LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
 297                                        LLVMConstVector(swizzles, 4), "");
 298
 299    normalized = FALSE;
 300    shift = 0;
 301    for (i = 0; i < 4; ++i) {
 302       unsigned bits = desc->channel[i].size;
 303
 304       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 305          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 306          scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
 307       }
 308       else {
 309          unsigned mask = (1 << bits) - 1;
 310
 311          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 312          assert(bits < 32);
 313
 314          shifts[i] = lp_build_const_int32(gallivm, shift);
 315
 316          if (desc->channel[i].normalized) {
 317             scales[i] = lp_build_const_float(gallivm, mask);
 318             normalized = TRUE;
 319          }
 320          else
 321             scales[i] = lp_build_const_float(gallivm, 1.0);
 322       }
 323
 324       shift += bits;
 325    }
 326
 327    if (normalized)
 328       scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
 329    else
 330       scaled = unswizzled;
 331
 332    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");
 333
 334    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
 335
 336    /* Bitwise or all components */
 337    for (i = 0; i < 4; ++i) {
 338       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
 339          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
 340                                                lp_build_const_int32(gallivm, i), "");
 341          if (packed)
 342             packed = LLVMBuildOr(builder, packed, component, "");
 343          else
 344             packed = component;
 345       }
 346    }
 347
 348    if (!packed)
 349       packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 350
 351    if (desc->block.bits < 32)
 352       packed = LLVMBuildTrunc(builder, packed, type, "");
 353
 354    return packed;
 355 }
 356
 357
 358
 359
 360 /**
 361  * Fetch a pixel into a 4 float AoS.
 362  *
 363  * \param format_desc  describes format of the image we're fetching from
 364  * \param ptr  address of the pixel block (or the texel if uncompressed)
 365  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 366  *              these will always be (0, 0).
 367  * \return  a 4 element vector with the pixel's RGBA values.
 368  */
 369 LLVMValueRef
 370 lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
 371                         const struct util_format_description *format_desc,
 372                         struct lp_type type,
 373                         LLVMValueRef base_ptr,
 374                         LLVMValueRef offset,
 375                         LLVMValueRef i,
 376                         LLVMValueRef j)
 377 {
 378    LLVMBuilderRef builder = gallivm->builder;
 379    unsigned num_pixels = type.length / 4;
 380    struct lp_build_context bld;
 381
 382    assert(type.length <= LP_MAX_VECTOR_LENGTH);
 383    assert(type.length % 4 == 0);
 384
 385    lp_build_context_init(&bld, gallivm, type);
 386
 387    /*
 388     * Trivial case
 389     *
 390     * The format matches the type (apart of a swizzle) so no need for
 391     * scaling or converting.
 392     */
 393
 394    if (format_matches_type(format_desc, type) &&
 395        format_desc->block.bits <= type.width * 4 &&
 396        util_is_power_of_two(format_desc->block.bits)) {
 397       LLVMValueRef packed;
 398
 399       /*
 400        * The format matches the type (apart of a swizzle) so no need for
 401        * scaling or converting.
 402        */
 403
 404       packed = lp_build_gather(gallivm, type.length/4,
 405                                format_desc->block.bits, type.width*4,
 406                                base_ptr, offset);
 407
 408       assert(format_desc->block.bits <= type.width * type.length);
 409
 410       packed = LLVMBuildBitCast(gallivm->builder, packed,
 411                                 lp_build_vec_type(gallivm, type), "");
 412
 413       return lp_build_format_swizzle_aos(format_desc, &bld, packed);
 414    }
 415
 416    /*
 417     * Bit arithmetic
 418     */
 419
 420    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 421        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 422         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 423        format_desc->block.width == 1 &&
 424        format_desc->block.height == 1 &&
 425        util_is_power_of_two(format_desc->block.bits) &&
 426        format_desc->block.bits <= 32 &&
 427        format_desc->is_bitmask &&
 428        !format_desc->is_mixed &&
 429        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
 430         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {
 431
 432       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
 433       LLVMValueRef res;
 434       unsigned k;
 435
 436       /*
 437        * Unpack a pixel at a time into a <4 x float> RGBA vector
 438        */
 439
 440       for (k = 0; k < num_pixels; ++k) {
 441          LLVMValueRef packed;
 442
 443          packed = lp_build_gather_elem(gallivm, num_pixels,
 444                                        format_desc->block.bits, 32,
 445                                        base_ptr, offset, k);
 446
 447          tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
 448                                                   format_desc,
 449                                                   packed);
 450       }
 451
 452       /*
 453        * Type conversion.
 454        *
 455        * TODO: We could avoid floating conversion for integer to
 456        * integer conversions.
 457        */
 458
 459       if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
 460          debug_printf("%s: unpacking %s with floating point\n",
 461                       __FUNCTION__, format_desc->short_name);
 462       }
 463
 464       lp_build_conv(gallivm,
 465                     lp_float32_vec4_type(),
 466                     type,
 467                     tmps, num_pixels, &res, 1);
 468
 469       return lp_build_format_swizzle_aos(format_desc, &bld, res);
 470    }
 471
 472    /*
 473     * YUV / subsampled formats
 474     */
 475
 476    if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
 477       struct lp_type tmp_type;
 478       LLVMValueRef tmp;
 479
 480       memset(&tmp_type, 0, sizeof tmp_type);
 481       tmp_type.width = 8;
 482       tmp_type.length = num_pixels * 4;
 483       tmp_type.norm = TRUE;
 484
 485       tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
 486                                                format_desc,
 487                                                num_pixels,
 488                                                base_ptr,
 489                                                offset,
 490                                                i, j);
 491
 492       lp_build_conv(gallivm,
 493                     tmp_type, type,
 494                     &tmp, 1, &tmp, 1);
 495
 496       return tmp;
 497    }
 498
 499    /*
 500     * Fallback to util_format_description::fetch_rgba_8unorm().
 501     */
 502
 503    if (format_desc->fetch_rgba_8unorm &&
 504        !type.floating && type.width == 8 && !type.sign && type.norm) {
 505       /*
 506        * Fallback to calling util_format_description::fetch_rgba_8unorm.
 507        *
 508        * This is definitely not the most efficient way of fetching pixels, as
 509        * we miss the opportunity to do vectorization, but this it is a
 510        * convenient for formats or scenarios for which there was no opportunity
 511        * or incentive to optimize.
 512        */
 513
 514       LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
 515       char name[256];
 516       LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
 517       LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
 518       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
 519       LLVMValueRef function;
 520       LLVMValueRef tmp_ptr;
 521       LLVMValueRef tmp;
 522       LLVMValueRef res;
 523       unsigned k;
 524
 525       util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
 526                     format_desc->short_name);
 527
 528       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 529          debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
 530       }
 531
 532       /*
 533        * Declare and bind format_desc->fetch_rgba_8unorm().
 534        */
 535
 536       function = LLVMGetNamedFunction(module, name);
 537       if (!function) {
 538          LLVMTypeRef ret_type;
 539          LLVMTypeRef arg_types[4];
 540          LLVMTypeRef function_type;
 541
 542          ret_type = LLVMVoidTypeInContext(gallivm->context);
 543          arg_types[0] = pi8t;
 544          arg_types[1] = pi8t;
 545          arg_types[3] = arg_types[2] = LLVMIntTypeInContext(gallivm->context, sizeof(unsigned) * 8);
 546          function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
 547          function = LLVMAddFunction(module, name, function_type);
 548
 549          LLVMSetFunctionCallConv(function, LLVMCCallConv);
 550          LLVMSetLinkage(function, LLVMExternalLinkage);
 551
 552          assert(LLVMIsDeclaration(function));
 553
 554          LLVMAddGlobalMapping(gallivm->engine, function,
 555                               func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm));
 556       }
 557
 558       tmp_ptr = lp_build_alloca(gallivm, i32t, "");
 559
 560       res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
 561
 562       /*
 563        * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
 564        * in the SoA vectors.
 565        */
 566
 567       for (k = 0; k < num_pixels; ++k) {
 568          LLVMValueRef index = lp_build_const_int32(gallivm, k);
 569          LLVMValueRef args[4];
 570
 571          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
 572          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
 573                                             base_ptr, offset, k);
 574
 575          if (num_pixels == 1) {
 576             args[2] = i;
 577             args[3] = j;
 578          }
 579          else {
 580             args[2] = LLVMBuildExtractElement(builder, i, index, "");
 581             args[3] = LLVMBuildExtractElement(builder, j, index, "");
 582          }
 583
 584          LLVMBuildCall(builder, function, args, Elements(args), "");
 585
 586          tmp = LLVMBuildLoad(builder, tmp_ptr, "");
 587
 588          if (num_pixels == 1) {
 589             res = tmp;
 590          }
 591          else {
 592             res = LLVMBuildInsertElement(builder, res, tmp, index, "");
 593          }
 594       }
 595
 596       /* Bitcast from <n x i32> to <4n x i8> */
 597       res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
 598
 599       return res;
 600    }
 601
 602
 603    /*
 604     * Fallback to util_format_description::fetch_rgba_float().
 605     */
 606
 607    if (format_desc->fetch_rgba_float) {
 608       /*
 609        * Fallback to calling util_format_description::fetch_rgba_float.
 610        *
 611        * This is definitely not the most efficient way of fetching pixels, as
 612        * we miss the opportunity to do vectorization, but this it is a
 613        * convenient for formats or scenarios for which there was no opportunity
 614        * or incentive to optimize.
 615        */
 616
 617       LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
 618       char name[256];
 619       LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
 620       LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
 621       LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
 622       LLVMValueRef function;
 623       LLVMValueRef tmp_ptr;
 624       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
 625       LLVMValueRef res;
 626       unsigned k;
 627
 628       util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
 629                     format_desc->short_name);
 630
 631       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
 632          debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
 633       }
 634
 635       /*
 636        * Declare and bind format_desc->fetch_rgba_float().
 637        */
 638
 639       function = LLVMGetNamedFunction(module, name);
 640       if (!function) {
 641          LLVMTypeRef ret_type;
 642          LLVMTypeRef arg_types[4];
 643          LLVMTypeRef function_type;
 644
 645          ret_type = LLVMVoidTypeInContext(gallivm->context);
 646          arg_types[0] = pf32t;
 647          arg_types[1] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
 648          arg_types[3] = arg_types[2] = LLVMIntTypeInContext(gallivm->context, sizeof(unsigned) * 8);
 649          function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
 650          function = LLVMAddFunction(module, name, function_type);
 651
 652          LLVMSetFunctionCallConv(function, LLVMCCallConv);
 653          LLVMSetLinkage(function, LLVMExternalLinkage);
 654
 655          assert(LLVMIsDeclaration(function));
 656
 657          LLVMAddGlobalMapping(gallivm->engine, function,
 658                               func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
 659       }
 660
 661       tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
 662
 663       /*
 664        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
 665        * in the SoA vectors.
 666        */
 667
 668       for (k = 0; k < num_pixels; ++k) {
 669          LLVMValueRef args[4];
 670
 671          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
 672          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
 673                                             base_ptr, offset, k);
 674
 675          if (num_pixels == 1) {
 676             args[2] = i;
 677             args[3] = j;
 678          }
 679          else {
 680             LLVMValueRef index = lp_build_const_int32(gallivm, k);
 681             args[2] = LLVMBuildExtractElement(builder, i, index, "");
 682             args[3] = LLVMBuildExtractElement(builder, j, index, "");
 683          }
 684
 685          LLVMBuildCall(builder, function, args, Elements(args), "");
 686
 687          tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
 688       }
 689
 690       lp_build_conv(gallivm,
 691                     lp_float32_vec4_type(),
 692                     type,
 693                     tmps, num_pixels, &res, 1);
 694
 695       return res;
 696    }
 697
 698    assert(0);
 699    return lp_build_undef(gallivm, type);
 700 }