src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * AoS pixel format manipulation.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  */
  34
  35
  36 #include "util/u_format.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_math.h"
  39 #include "util/u_string.h"
  40
  41 #include "lp_bld_arit.h"
  42 #include "lp_bld_init.h"
  43 #include "lp_bld_type.h"
  44 #include "lp_bld_flow.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_swizzle.h"
  48 #include "lp_bld_gather.h"
  49 #include "lp_bld_format.h"
  50
  51
  52 /**
  53  * Basic swizzling.  Rearrange the order of the unswizzled array elements
  54  * according to the format description.  PIPE_SWIZZLE_ZERO/ONE are supported
  55  * too.
  56  * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
  57  */
  58 LLVMValueRef
  59 lp_build_format_swizzle_aos(const struct util_format_description *desc,
  60                             struct lp_build_context *bld,
  61                             LLVMValueRef unswizzled)
  62 {
  63    unsigned char swizzles[4];
  64    unsigned chan;
  65
  66    assert(bld->type.length % 4 == 0);
  67
  68    for (chan = 0; chan < 4; ++chan) {
  69       enum util_format_swizzle swizzle;
  70
  71       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  72          /*
  73           * For ZS formats do RGBA = ZZZ1
  74           */
  75          if (chan == 3) {
  76             swizzle = UTIL_FORMAT_SWIZZLE_1;
  77          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
  78             swizzle = UTIL_FORMAT_SWIZZLE_0;
  79          } else {
  80             swizzle = desc->swizzle[0];
  81          }
  82       } else {
  83          swizzle = desc->swizzle[chan];
  84       }
  85       swizzles[chan] = swizzle;
  86    }
  87
  88    return lp_build_swizzle_aos(bld, unswizzled, swizzles);
  89 }
  90
  91
  92 /**
  93  * Whether the format matches the vector type, apart of swizzles.
  94  */
  95 static INLINE boolean
  96 format_matches_type(const struct util_format_description *desc,
  97                     struct lp_type type)
  98 {
  99    enum util_format_type chan_type;
 100    unsigned chan;
 101
 102    assert(type.length % 4 == 0);
 103
 104    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
 105        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
 106        desc->block.width != 1 ||
 107        desc->block.height != 1) {
 108       return FALSE;
 109    }
 110
 111    if (type.floating) {
 112       chan_type = UTIL_FORMAT_TYPE_FLOAT;
 113    } else if (type.fixed) {
 114       chan_type = UTIL_FORMAT_TYPE_FIXED;
 115    } else if (type.sign) {
 116       chan_type = UTIL_FORMAT_TYPE_SIGNED;
 117    } else {
 118       chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
 119    }
 120
 121    for (chan = 0; chan < desc->nr_channels; ++chan) {
 122       if (desc->channel[chan].size != type.width) {
 123          return FALSE;
 124       }
 125
 126       if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
 127          if (desc->channel[chan].type != chan_type ||
 128              desc->channel[chan].normalized != type.norm) {
 129             return FALSE;
 130          }
 131       }
 132    }
 133
 134    return TRUE;
 135 }
 136
 137
 138 /**
 139  * Unpack a single pixel into its RGBA components.
 140  *
 141  * @param desc  the pixel format for the packed pixel value
 142  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
 143  *
 144  * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
 145  */
 146 static INLINE LLVMValueRef
 147 lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder,
 148                                const struct util_format_description *desc,
 149                                LLVMValueRef packed)
 150 {
 151    LLVMValueRef shifted, casted, scaled, masked;
 152    LLVMValueRef shifts[4];
 153    LLVMValueRef masks[4];
 154    LLVMValueRef scales[4];
 155
 156    boolean normalized;
 157    boolean needs_uitofp;
 158    unsigned shift;
 159    unsigned i;
 160
 161    /* TODO: Support more formats */
 162    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 163    assert(desc->block.width == 1);
 164    assert(desc->block.height == 1);
 165    assert(desc->block.bits <= 32);
 166
 167    /* Do the intermediate integer computations with 32bit integers since it
 168     * matches floating point size */
 169    assert (LLVMTypeOf(packed) == LLVMInt32Type());
 170
 171    /* Broadcast the packed value to all four channels
 172     * before: packed = BGRA
 173     * after: packed = {BGRA, BGRA, BGRA, BGRA}
 174     */
 175    packed = LLVMBuildInsertElement(builder,
 176                                    LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
 177                                    packed,
 178                                    LLVMConstNull(LLVMInt32Type()),
 179                                    "");
 180    packed = LLVMBuildShuffleVector(builder,
 181                                    packed,
 182                                    LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
 183                                    LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
 184                                    "");
 185
 186    /* Initialize vector constants */
 187    normalized = FALSE;
 188    needs_uitofp = FALSE;
 189    shift = 0;
 190
 191    /* Loop over 4 color components */
 192    for (i = 0; i < 4; ++i) {
 193       unsigned bits = desc->channel[i].size;
 194
 195       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 196          shifts[i] = LLVMGetUndef(LLVMInt32Type());
 197          masks[i] = LLVMConstNull(LLVMInt32Type());
 198          scales[i] =  LLVMConstNull(LLVMFloatType());
 199       }
 200       else {
 201          unsigned long long mask = (1ULL << bits) - 1;
 202
 203          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 204
 205          if (bits == 32) {
 206             needs_uitofp = TRUE;
 207          }
 208
 209          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
 210          masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
 211
 212          if (desc->channel[i].normalized) {
 213             scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
 214             normalized = TRUE;
 215          }
 216          else
 217             scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
 218       }
 219
 220       shift += bits;
 221    }
 222
 223    /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
 224     * into masked = {B, G, R, A}
 225     */
 226    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
 227    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
 228
 229
 230    if (!needs_uitofp) {
 231       /* UIToFP can't be expressed in SSE2 */
 232       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 233    } else {
 234       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 235    }
 236
 237    /* At this point 'casted' may be a vector of floats such as
 238     * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
 239     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
 240     */
 241
 242    if (normalized)
 243       scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
 244    else
 245       scaled = casted;
 246
 247    return scaled;
 248 }
 249
 250
 251 /**
 252  * Pack a single pixel.
 253  *
 254  * @param rgba 4 float vector with the unpacked components.
 255  *
 256  * XXX: This is mostly for reference and testing -- operating a single pixel at
 257  * a time is rarely if ever needed.
 258  */
 259 LLVMValueRef
 260 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 261                        const struct util_format_description *desc,
 262                        LLVMValueRef rgba)
 263 {
 264    LLVMTypeRef type;
 265    LLVMValueRef packed = NULL;
 266    LLVMValueRef swizzles[4];
 267    LLVMValueRef shifted, casted, scaled, unswizzled;
 268    LLVMValueRef shifts[4];
 269    LLVMValueRef scales[4];
 270    boolean normalized;
 271    unsigned shift;
 272    unsigned i, j;
 273
 274    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
 275    assert(desc->block.width == 1);
 276    assert(desc->block.height == 1);
 277
 278    type = LLVMIntType(desc->block.bits);
 279
 280    /* Unswizzle the color components into the source vector. */
 281    for (i = 0; i < 4; ++i) {
 282       for (j = 0; j < 4; ++j) {
 283          if (desc->swizzle[j] == i)
 284             break;
 285       }
 286       if (j < 4)
 287          swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
 288       else
 289          swizzles[i] = LLVMGetUndef(LLVMInt32Type());
 290    }
 291
 292    unswizzled = LLVMBuildShuffleVector(builder, rgba,
 293                                        LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
 294                                        LLVMConstVector(swizzles, 4), "");
 295
 296    normalized = FALSE;
 297    shift = 0;
 298    for (i = 0; i < 4; ++i) {
 299       unsigned bits = desc->channel[i].size;
 300
 301       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
 302          shifts[i] = LLVMGetUndef(LLVMInt32Type());
 303          scales[i] =  LLVMGetUndef(LLVMFloatType());
 304       }
 305       else {
 306          unsigned mask = (1 << bits) - 1;
 307
 308          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
 309          assert(bits < 32);
 310
 311          shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
 312
 313          if (desc->channel[i].normalized) {
 314             scales[i] = LLVMConstReal(LLVMFloatType(), mask);
 315             normalized = TRUE;
 316          }
 317          else
 318             scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
 319       }
 320
 321       shift += bits;
 322    }
 323
 324    if (normalized)
 325       scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
 326    else
 327       scaled = unswizzled;
 328
 329    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
 330
 331    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
 332
 333    /* Bitwise or all components */
 334    for (i = 0; i < 4; ++i) {
 335       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
 336          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
 337          if (packed)
 338             packed = LLVMBuildOr(builder, packed, component, "");
 339          else
 340             packed = component;
 341       }
 342    }
 343
 344    if (!packed)
 345       packed = LLVMGetUndef(LLVMInt32Type());
 346
 347    if (desc->block.bits < 32)
 348       packed = LLVMBuildTrunc(builder, packed, type, "");
 349
 350    return packed;
 351 }
 352
 353
 354
 355
 356 /**
 357  * Fetch a pixel into a 4 float AoS.
 358  *
 359  * \param format_desc  describes format of the image we're fetching from
 360  * \param ptr  address of the pixel block (or the texel if uncompressed)
 361  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 362  *              these will always be (0, 0).
 363  * \return  a 4 element vector with the pixel's RGBA values.
 364  */
 365 LLVMValueRef
 366 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 367                         const struct util_format_description *format_desc,
 368                         struct lp_type type,
 369                         LLVMValueRef base_ptr,
 370                         LLVMValueRef offset,
 371                         LLVMValueRef i,
 372                         LLVMValueRef j)
 373 {
 374    unsigned num_pixels = type.length / 4;
 375    struct lp_build_context bld;
 376
 377    assert(type.length <= LP_MAX_VECTOR_LENGTH);
 378    assert(type.length % 4 == 0);
 379
 380    lp_build_context_init(&bld, builder, type);
 381
 382    /*
 383     * Trivial case
 384     *
 385     * The format matches the type (apart of a swizzle) so no need for
 386     * scaling or converting.
 387     */
 388
 389    if (format_matches_type(format_desc, type) &&
 390        format_desc->block.bits <= type.width * 4 &&
 391        util_is_power_of_two(format_desc->block.bits)) {
 392       LLVMValueRef packed;
 393
 394       /*
 395        * The format matches the type (apart of a swizzle) so no need for
 396        * scaling or converting.
 397        */
 398
 399       packed = lp_build_gather(builder, type.length/4,
 400                                format_desc->block.bits, type.width*4,
 401                                base_ptr, offset);
 402
 403       assert(format_desc->block.bits <= type.width * type.length);
 404
 405       packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), "");
 406
 407       return lp_build_format_swizzle_aos(format_desc, &bld, packed);
 408    }
 409
 410    /*
 411     * Bit arithmetic
 412     */
 413
 414    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
 415        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
 416         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
 417        format_desc->block.width == 1 &&
 418        format_desc->block.height == 1 &&
 419        util_is_power_of_two(format_desc->block.bits) &&
 420        format_desc->block.bits <= 32 &&
 421        format_desc->is_bitmask &&
 422        !format_desc->is_mixed &&
 423        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
 424         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {
 425
 426       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
 427       LLVMValueRef res;
 428       unsigned k;
 429
 430       /*
 431        * Unpack a pixel at a time into a <4 x float> RGBA vector
 432        */
 433
 434       for (k = 0; k < num_pixels; ++k) {
 435          LLVMValueRef packed;
 436
 437          packed = lp_build_gather_elem(builder, num_pixels,
 438                                        format_desc->block.bits, 32,
 439                                        base_ptr, offset, k);
 440
 441          tmps[k] = lp_build_unpack_arith_rgba_aos(builder, format_desc,
 442                                                   packed);
 443       }
 444
 445       /*
 446        * Type conversion.
 447        *
 448        * TODO: We could avoid floating conversion for integer to
 449        * integer conversions.
 450        */
 451
 452       lp_build_conv(builder,
 453                     lp_float32_vec4_type(),
 454                     type,
 455                     tmps, num_pixels, &res, 1);
 456
 457       return lp_build_format_swizzle_aos(format_desc, &bld, res);
 458    }
 459
 460    /*
 461     * YUV / subsampled formats
 462     */
 463
 464    if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
 465       struct lp_type tmp_type;
 466       LLVMValueRef tmp;
 467
 468       memset(&tmp_type, 0, sizeof tmp_type);
 469       tmp_type.width = 8;
 470       tmp_type.length = num_pixels * 4;
 471       tmp_type.norm = TRUE;
 472
 473       tmp = lp_build_fetch_subsampled_rgba_aos(builder,
 474                                                format_desc,
 475                                                num_pixels,
 476                                                base_ptr,
 477                                                offset,
 478                                                i, j);
 479
 480       lp_build_conv(builder,
 481                     tmp_type, type,
 482                     &tmp, 1, &tmp, 1);
 483
 484       return tmp;
 485    }
 486
 487    /*
 488     * Fallback to util_format_description::fetch_rgba_8unorm().
 489     */
 490
 491    if (format_desc->fetch_rgba_8unorm &&
 492        !type.floating && type.width == 8 && !type.sign && type.norm) {
 493       /*
 494        * Fallback to calling util_format_description::fetch_rgba_8unorm.
 495        *
 496        * This is definitely not the most efficient way of fetching pixels, as
 497        * we miss the opportunity to do vectorization, but this it is a
 498        * convenient for formats or scenarios for which there was no opportunity
 499        * or incentive to optimize.
 500        */
 501
 502       LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
 503       char name[256];
 504       LLVMTypeRef i8t = LLVMInt8Type();
 505       LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
 506       LLVMTypeRef i32t = LLVMInt32Type();
 507       LLVMValueRef function;
 508       LLVMValueRef tmp_ptr;
 509       LLVMValueRef tmp;
 510       LLVMValueRef res;
 511       unsigned k;
 512
 513       util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
 514                     format_desc->short_name);
 515
 516       /*
 517        * Declare and bind format_desc->fetch_rgba_8unorm().
 518        */
 519
 520       function = LLVMGetNamedFunction(module, name);
 521       if (!function) {
 522          LLVMTypeRef ret_type;
 523          LLVMTypeRef arg_types[4];
 524          LLVMTypeRef function_type;
 525
 526          ret_type = LLVMVoidType();
 527          arg_types[0] = pi8t;
 528          arg_types[1] = pi8t;
 529          arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
 530          function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
 531          function = LLVMAddFunction(module, name, function_type);
 532
 533          LLVMSetFunctionCallConv(function, LLVMCCallConv);
 534          LLVMSetLinkage(function, LLVMExternalLinkage);
 535
 536          assert(LLVMIsDeclaration(function));
 537
 538          LLVMAddGlobalMapping(lp_build_engine, function,
 539                               func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm));
 540       }
 541
 542       tmp_ptr = lp_build_alloca(builder, i32t, "");
 543
 544       res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
 545
 546       /*
 547        * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
 548        * in the SoA vectors.
 549        */
 550
 551       for (k = 0; k < num_pixels; ++k) {
 552          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
 553          LLVMValueRef args[4];
 554
 555          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
 556          args[1] = lp_build_gather_elem_ptr(builder, num_pixels,
 557                                             base_ptr, offset, k);
 558
 559          if (num_pixels == 1) {
 560             args[2] = i;
 561             args[3] = j;
 562          }
 563          else {
 564             args[2] = LLVMBuildExtractElement(builder, i, index, "");
 565             args[3] = LLVMBuildExtractElement(builder, j, index, "");
 566          }
 567
 568          LLVMBuildCall(builder, function, args, Elements(args), "");
 569
 570          tmp = LLVMBuildLoad(builder, tmp_ptr, "");
 571
 572          if (num_pixels == 1) {
 573             res = tmp;
 574          }
 575          else {
 576             res = LLVMBuildInsertElement(builder, res, tmp, index, "");
 577          }
 578       }
 579
 580       /* Bitcast from <n x i32> to <4n x i8> */
 581       res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
 582
 583       return res;
 584    }
 585
 586
 587    /*
 588     * Fallback to util_format_description::fetch_rgba_float().
 589     */
 590
 591    if (format_desc->fetch_rgba_float) {
 592       /*
 593        * Fallback to calling util_format_description::fetch_rgba_float.
 594        *
 595        * This is definitely not the most efficient way of fetching pixels, as
 596        * we miss the opportunity to do vectorization, but this it is a
 597        * convenient for formats or scenarios for which there was no opportunity
 598        * or incentive to optimize.
 599        */
 600
 601       LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
 602       char name[256];
 603       LLVMTypeRef f32t = LLVMFloatType();
 604       LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
 605       LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
 606       LLVMValueRef function;
 607       LLVMValueRef tmp_ptr;
 608       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
 609       LLVMValueRef res;
 610       unsigned k;
 611
 612       util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
 613                     format_desc->short_name);
 614
 615       /*
 616        * Declare and bind format_desc->fetch_rgba_float().
 617        */
 618
 619       function = LLVMGetNamedFunction(module, name);
 620       if (!function) {
 621          LLVMTypeRef ret_type;
 622          LLVMTypeRef arg_types[4];
 623          LLVMTypeRef function_type;
 624
 625          ret_type = LLVMVoidType();
 626          arg_types[0] = pf32t;
 627          arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
 628          arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
 629          function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
 630          function = LLVMAddFunction(module, name, function_type);
 631
 632          LLVMSetFunctionCallConv(function, LLVMCCallConv);
 633          LLVMSetLinkage(function, LLVMExternalLinkage);
 634
 635          assert(LLVMIsDeclaration(function));
 636
 637          LLVMAddGlobalMapping(lp_build_engine, function,
 638                               func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
 639       }
 640
 641       tmp_ptr = lp_build_alloca(builder, f32x4t, "");
 642
 643       /*
 644        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
 645        * in the SoA vectors.
 646        */
 647
 648       for (k = 0; k < num_pixels; ++k) {
 649          LLVMValueRef args[4];
 650
 651          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
 652          args[1] = lp_build_gather_elem_ptr(builder, num_pixels,
 653                                             base_ptr, offset, k);
 654
 655          if (num_pixels == 1) {
 656             args[2] = i;
 657             args[3] = j;
 658          }
 659          else {
 660             LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
 661             args[2] = LLVMBuildExtractElement(builder, i, index, "");
 662             args[3] = LLVMBuildExtractElement(builder, j, index, "");
 663          }
 664
 665          LLVMBuildCall(builder, function, args, Elements(args), "");
 666
 667          tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
 668       }
 669
 670       lp_build_conv(builder,
 671                     lp_float32_vec4_type(),
 672                     type,
 673                     tmps, num_pixels, &res, 1);
 674
 675       return res;
 676    }
 677
 678    assert(0);
 679    return lp_build_undef(type);
 680 }