src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "util/u_cpu_detect.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_type.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_arit.h"
  49 #include "lp_bld_logic.h"
  50 #include "lp_bld_swizzle.h"
  51 #include "lp_bld_pack.h"
  52 #include "lp_bld_flow.h"
  53 #include "lp_bld_gather.h"
  54 #include "lp_bld_format.h"
  55 #include "lp_bld_sample.h"
  56
  57
  58 /**
  59  * Keep all information for sampling code generation in a single place.
  60  */
  61 struct lp_build_sample_context
  62 {
  63    LLVMBuilderRef builder;
  64
  65    const struct lp_sampler_static_state *static_state;
  66
  67    struct lp_sampler_dynamic_state *dynamic_state;
  68
  69    const struct util_format_description *format_desc;
  70
  71    /** regular scalar float type */
  72    struct lp_type float_type;
  73    struct lp_build_context float_bld;
  74
  75    /** regular scalar float type */
  76    struct lp_type int_type;
  77    struct lp_build_context int_bld;
  78
  79    /** Incoming coordinates type and build context */
  80    struct lp_type coord_type;
  81    struct lp_build_context coord_bld;
  82
  83    /** Unsigned integer coordinates */
  84    struct lp_type uint_coord_type;
  85    struct lp_build_context uint_coord_bld;
  86
  87    /** Signed integer coordinates */
  88    struct lp_type int_coord_type;
  89    struct lp_build_context int_coord_bld;
  90
  91    /** Output texels type and build context */
  92    struct lp_type texel_type;
  93    struct lp_build_context texel_bld;
  94 };
  95
  96
  97 /**
  98  * Does the given texture wrap mode allow sampling the texture border color?
  99  * XXX maybe move this into gallium util code.
 100  */
 101 static boolean
 102 wrap_mode_uses_border_color(unsigned mode)
 103 {
 104    switch (mode) {
 105    case PIPE_TEX_WRAP_REPEAT:
 106    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 107    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 108    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 109       return FALSE;
 110    case PIPE_TEX_WRAP_CLAMP:
 111    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 112    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 113    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 114       return TRUE;
 115    default:
 116       assert(0 && "unexpected wrap mode");
 117       return FALSE;
 118    }
 119 }
 120
 121
 122 static LLVMValueRef
 123 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 124                           LLVMValueRef data_array, LLVMValueRef level)
 125 {
 126    LLVMValueRef indexes[2], data_ptr;
 127    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 128    indexes[1] = level;
 129    data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
 130    data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
 131    return data_ptr;
 132 }
 133
 134
 135 static LLVMValueRef
 136 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 137                                 LLVMValueRef data_array, int level)
 138 {
 139    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 140    return lp_build_get_mipmap_level(bld, data_array, lvl);
 141 }
 142
 143
 144 /**
 145  * Dereference stride_array[mipmap_level] array to get a stride.
 146  * Return stride as a vector.
 147  */
 148 static LLVMValueRef
 149 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 150                               LLVMValueRef stride_array, LLVMValueRef level)
 151 {
 152    LLVMValueRef indexes[2], stride;
 153    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 154    indexes[1] = level;
 155    stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
 156    stride = LLVMBuildLoad(bld->builder, stride, "");
 157    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 158    return stride;
 159 }
 160
 161
 162 /** Dereference stride_array[0] array to get a stride (as vector). */
 163 static LLVMValueRef
 164 lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
 165                                     LLVMValueRef stride_array, int level)
 166 {
 167    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 168    return lp_build_get_level_stride_vec(bld, stride_array, lvl);
 169 }
 170
 171
 172 static int
 173 texture_dims(enum pipe_texture_target tex)
 174 {
 175    switch (tex) {
 176    case PIPE_TEXTURE_1D:
 177       return 1;
 178    case PIPE_TEXTURE_2D:
 179    case PIPE_TEXTURE_CUBE:
 180       return 2;
 181    case PIPE_TEXTURE_3D:
 182       return 3;
 183    default:
 184       assert(0 && "bad texture target in texture_dims()");
 185       return 2;
 186    }
 187 }
 188
 189
 190 static void
 191 apply_sampler_swizzle(struct lp_build_sample_context *bld,
 192                       LLVMValueRef *texel)
 193 {
 194    unsigned char swizzles[4];
 195
 196    swizzles[0] = bld->static_state->swizzle_r;
 197    swizzles[1] = bld->static_state->swizzle_g;
 198    swizzles[2] = bld->static_state->swizzle_b;
 199    swizzles[3] = bld->static_state->swizzle_a;
 200
 201    lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
 202 }
 203
 204
 205
 206 /**
 207  * Generate code to fetch a texel from a texture at int coords (x, y, z).
 208  * The computation depends on whether the texture is 1D, 2D or 3D.
 209  * The result, texel, will be:
 210  *   texel[0] = red values
 211  *   texel[1] = green values
 212  *   texel[2] = blue values
 213  *   texel[3] = alpha values
 214  */
 215 static void
 216 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 217                           LLVMValueRef width,
 218                           LLVMValueRef height,
 219                           LLVMValueRef depth,
 220                           LLVMValueRef x,
 221                           LLVMValueRef y,
 222                           LLVMValueRef z,
 223                           LLVMValueRef y_stride,
 224                           LLVMValueRef z_stride,
 225                           LLVMValueRef data_ptr,
 226                           LLVMValueRef texel_out[4])
 227 {
 228    const int dims = texture_dims(bld->static_state->target);
 229    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 230    LLVMValueRef offset;
 231    LLVMValueRef i, j;
 232    LLVMValueRef use_border = NULL;
 233
 234    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
 235    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
 236       LLVMValueRef b1, b2;
 237       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 238       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 239       use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 240    }
 241
 242    if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
 243       LLVMValueRef b1, b2;
 244       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 245       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 246       if (use_border) {
 247          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 248          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 249       }
 250       else {
 251          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 252       }
 253    }
 254
 255    if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
 256       LLVMValueRef b1, b2;
 257       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 258       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 259       if (use_border) {
 260          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 261          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 262       }
 263       else {
 264          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 265       }
 266    }
 267
 268    /*
 269     * Describe the coordinates in terms of pixel blocks.
 270     *
 271     * TODO: pixel blocks are power of two. LLVM should convert rem/div to
 272     * bit arithmetic. Verify this.
 273     */
 274
 275    if (bld->format_desc->block.width == 1) {
 276       i = bld->uint_coord_bld.zero;
 277    }
 278    else {
 279       LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
 280       i = LLVMBuildURem(bld->builder, x, block_width, "");
 281       x = LLVMBuildUDiv(bld->builder, x, block_width, "");
 282    }
 283
 284    if (bld->format_desc->block.height == 1) {
 285       j = bld->uint_coord_bld.zero;
 286    }
 287    else {
 288       LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
 289       j = LLVMBuildURem(bld->builder, y, block_height, "");
 290       y = LLVMBuildUDiv(bld->builder, y, block_height, "");
 291    }
 292
 293    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 294    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 295                                    bld->format_desc,
 296                                    x, y, z, y_stride, z_stride);
 297
 298    if (use_border) {
 299       /* If we can sample the border color, it means that texcoords may
 300        * lie outside the bounds of the texture image.  We need to do
 301        * something to prevent reading out of bounds and causing a segfault.
 302        *
 303        * Simply AND the texture coords with !use_border.  This will cause
 304        * coords which are out of bounds to become zero.  Zero's guaranteed
 305        * to be inside the texture image.
 306        */
 307       offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
 308    }
 309
 310    lp_build_fetch_rgba_soa(bld->builder,
 311                            bld->format_desc,
 312                            bld->texel_type,
 313                            data_ptr, offset,
 314                            i, j,
 315                            texel_out);
 316
 317    apply_sampler_swizzle(bld, texel_out);
 318
 319    /*
 320     * Note: if we find an app which frequently samples the texture border
 321     * we might want to implement a true conditional here to avoid sampling
 322     * the texture whenever possible (since that's quite a bit of code).
 323     * Ex:
 324     *   if (use_border) {
 325     *      texel = border_color;
 326     *   }
 327     *   else {
 328     *      texel = sample_texture(coord);
 329     *   }
 330     * As it is now, we always sample the texture, then selectively replace
 331     * the texel color results with the border color.
 332     */
 333
 334    if (use_border) {
 335       /* select texel color or border color depending on use_border */
 336       int chan;
 337       for (chan = 0; chan < 4; chan++) {
 338          LLVMValueRef border_chan =
 339             lp_build_const_vec(bld->texel_type,
 340                                   bld->static_state->border_color[chan]);
 341          texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 342                                            border_chan, texel_out[chan]);
 343       }
 344    }
 345 }
 346
 347
 348 static LLVMValueRef
 349 lp_build_sample_packed(struct lp_build_sample_context *bld,
 350                        LLVMValueRef x,
 351                        LLVMValueRef y,
 352                        LLVMValueRef y_stride,
 353                        LLVMValueRef data_array)
 354 {
 355    LLVMValueRef offset;
 356    LLVMValueRef data_ptr;
 357
 358    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 359                                    bld->format_desc,
 360                                    x, y, NULL, y_stride, NULL);
 361
 362    assert(bld->format_desc->block.width == 1);
 363    assert(bld->format_desc->block.height == 1);
 364    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 365
 366    /* get pointer to mipmap level 0 data */
 367    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 368
 369    return lp_build_gather(bld->builder,
 370                           bld->texel_type.length,
 371                           bld->format_desc->block.bits,
 372                           bld->texel_type.width,
 373                           data_ptr, offset);
 374 }
 375
 376
 377 /**
 378  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 379  */
 380 static LLVMValueRef
 381 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 382                       LLVMValueRef coord)
 383 {
 384    struct lp_build_context *coord_bld = &bld->coord_bld;
 385    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 386    LLVMValueRef fract, flr, isOdd;
 387
 388    /* fract = coord - floor(coord) */
 389    fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
 390
 391    /* flr = ifloor(coord); */
 392    flr = lp_build_ifloor(coord_bld, coord);
 393
 394    /* isOdd = flr & 1 */
 395    isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
 396
 397    /* make coord positive or negative depending on isOdd */
 398    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 399
 400    /* convert isOdd to float */
 401    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 402
 403    /* add isOdd to coord */
 404    coord = lp_build_add(coord_bld, coord, isOdd);
 405
 406    return coord;
 407 }
 408
 409
 410 /**
 411  * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
 412  * Return whether the given mode is supported by that function.
 413  */
 414 static boolean
 415 is_simple_wrap_mode(unsigned mode)
 416 {
 417    switch (mode) {
 418    case PIPE_TEX_WRAP_REPEAT:
 419    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 420       return TRUE;
 421    default:
 422       return FALSE;
 423    }
 424 }
 425
 426
 427 /**
 428  * Build LLVM code for texture wrap mode, for scaled integer texcoords.
 429  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
 430  * \param length  the texture size along one dimension
 431  * \param is_pot  if TRUE, length is a power of two
 432  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 433  */
 434 static LLVMValueRef
 435 lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
 436                          LLVMValueRef coord,
 437                          LLVMValueRef length,
 438                          boolean is_pot,
 439                          unsigned wrap_mode)
 440 {
 441    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 442    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 443    LLVMValueRef length_minus_one;
 444
 445    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 446
 447    switch(wrap_mode) {
 448    case PIPE_TEX_WRAP_REPEAT:
 449       if(is_pot)
 450          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
 451       else
 452          /* Signed remainder won't give the right results for negative
 453           * dividends but unsigned remainder does.*/
 454          coord = LLVMBuildURem(bld->builder, coord, length, "");
 455       break;
 456
 457    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 458       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 459       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 460       break;
 461
 462    case PIPE_TEX_WRAP_CLAMP:
 463    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 464    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 465    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 466    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 467    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 468    default:
 469       assert(0);
 470    }
 471
 472    return coord;
 473 }
 474
 475
 476 /**
 477  * Build LLVM code for texture wrap mode for linear filtering.
 478  * \param x0_out  returns first integer texcoord
 479  * \param x1_out  returns second integer texcoord
 480  * \param weight_out  returns linear interpolation weight
 481  */
 482 static void
 483 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 484                             LLVMValueRef coord,
 485                             LLVMValueRef length,
 486                             boolean is_pot,
 487                             unsigned wrap_mode,
 488                             LLVMValueRef *x0_out,
 489                             LLVMValueRef *x1_out,
 490                             LLVMValueRef *weight_out)
 491 {
 492    struct lp_build_context *coord_bld = &bld->coord_bld;
 493    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 494    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 495    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
 496    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 497    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 498    LLVMValueRef coord0, coord1, weight;
 499
 500    switch(wrap_mode) {
 501    case PIPE_TEX_WRAP_REPEAT:
 502       /* mul by size and subtract 0.5 */
 503       coord = lp_build_mul(coord_bld, coord, length_f);
 504       coord = lp_build_sub(coord_bld, coord, half);
 505       /* convert to int */
 506       coord0 = lp_build_ifloor(coord_bld, coord);
 507       coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
 508       /* compute lerp weight */
 509       weight = lp_build_fract(coord_bld, coord);
 510       /* repeat wrap */
 511       if (is_pot) {
 512          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 513          coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
 514       }
 515       else {
 516          /* Signed remainder won't give the right results for negative
 517           * dividends but unsigned remainder does.*/
 518          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 519          coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
 520       }
 521       break;
 522
 523    case PIPE_TEX_WRAP_CLAMP:
 524       if (bld->static_state->normalized_coords) {
 525          /* scale coord to length */
 526          coord = lp_build_mul(coord_bld, coord, length_f);
 527       }
 528
 529       /* clamp to [0, length] */
 530       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 531
 532       coord = lp_build_sub(coord_bld, coord, half);
 533
 534       weight = lp_build_fract(coord_bld, coord);
 535       coord0 = lp_build_ifloor(coord_bld, coord);
 536       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 537       break;
 538
 539    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 540       if (bld->static_state->normalized_coords) {
 541          /* clamp to [0,1] */
 542          coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
 543          /* mul by tex size and subtract 0.5 */
 544          coord = lp_build_mul(coord_bld, coord, length_f);
 545          coord = lp_build_sub(coord_bld, coord, half);
 546       }
 547       else {
 548          LLVMValueRef min, max;
 549          /* clamp to [0.5, length - 0.5] */
 550          min = half;
 551          max = lp_build_sub(coord_bld, length_f, min);
 552          coord = lp_build_clamp(coord_bld, coord, min, max);
 553       }
 554       /* compute lerp weight */
 555       weight = lp_build_fract(coord_bld, coord);
 556       /* coord0 = floor(coord); */
 557       coord0 = lp_build_ifloor(coord_bld, coord);
 558       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 559       /* coord0 = max(coord0, 0) */
 560       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 561       /* coord1 = min(coord1, length-1) */
 562       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 563       break;
 564
 565    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 566       {
 567          LLVMValueRef min, max;
 568          if (bld->static_state->normalized_coords) {
 569             /* scale coord to length */
 570             coord = lp_build_mul(coord_bld, coord, length_f);
 571          }
 572          /* clamp to [-0.5, length + 0.5] */
 573          min = lp_build_const_vec(coord_bld->type, -0.5F);
 574          max = lp_build_sub(coord_bld, length_f, min);
 575          coord = lp_build_clamp(coord_bld, coord, min, max);
 576          coord = lp_build_sub(coord_bld, coord, half);
 577          /* compute lerp weight */
 578          weight = lp_build_fract(coord_bld, coord);
 579          /* convert to int */
 580          coord0 = lp_build_ifloor(coord_bld, coord);
 581          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 582       }
 583       break;
 584
 585    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 586       /* compute mirror function */
 587       coord = lp_build_coord_mirror(bld, coord);
 588
 589       /* scale coord to length */
 590       coord = lp_build_mul(coord_bld, coord, length_f);
 591       coord = lp_build_sub(coord_bld, coord, half);
 592
 593       /* compute lerp weight */
 594       weight = lp_build_fract(coord_bld, coord);
 595
 596       /* convert to int coords */
 597       coord0 = lp_build_ifloor(coord_bld, coord);
 598       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 599
 600       /* coord0 = max(coord0, 0) */
 601       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 602       /* coord1 = min(coord1, length-1) */
 603       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 604       break;
 605
 606    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 607       coord = lp_build_abs(coord_bld, coord);
 608
 609       if (bld->static_state->normalized_coords) {
 610          /* scale coord to length */
 611          coord = lp_build_mul(coord_bld, coord, length_f);
 612       }
 613
 614       /* clamp to [0, length] */
 615       coord = lp_build_min(coord_bld, coord, length_f);
 616
 617       coord = lp_build_sub(coord_bld, coord, half);
 618
 619       weight = lp_build_fract(coord_bld, coord);
 620       coord0 = lp_build_ifloor(coord_bld, coord);
 621       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 622       break;
 623
 624    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 625       {
 626          LLVMValueRef min, max;
 627
 628          coord = lp_build_abs(coord_bld, coord);
 629
 630          if (bld->static_state->normalized_coords) {
 631             /* scale coord to length */
 632             coord = lp_build_mul(coord_bld, coord, length_f);
 633          }
 634
 635          /* clamp to [0.5, length - 0.5] */
 636          min = half;
 637          max = lp_build_sub(coord_bld, length_f, min);
 638          coord = lp_build_clamp(coord_bld, coord, min, max);
 639
 640          coord = lp_build_sub(coord_bld, coord, half);
 641
 642          weight = lp_build_fract(coord_bld, coord);
 643          coord0 = lp_build_ifloor(coord_bld, coord);
 644          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 645       }
 646       break;
 647
 648    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 649       {
 650          LLVMValueRef min, max;
 651
 652          coord = lp_build_abs(coord_bld, coord);
 653
 654          if (bld->static_state->normalized_coords) {
 655             /* scale coord to length */
 656             coord = lp_build_mul(coord_bld, coord, length_f);
 657          }
 658
 659          /* clamp to [-0.5, length + 0.5] */
 660          min = lp_build_negate(coord_bld, half);
 661          max = lp_build_sub(coord_bld, length_f, min);
 662          coord = lp_build_clamp(coord_bld, coord, min, max);
 663
 664          coord = lp_build_sub(coord_bld, coord, half);
 665
 666          weight = lp_build_fract(coord_bld, coord);
 667          coord0 = lp_build_ifloor(coord_bld, coord);
 668          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 669       }
 670       break;
 671
 672    default:
 673       assert(0);
 674       coord0 = NULL;
 675       coord1 = NULL;
 676       weight = NULL;
 677    }
 678
 679    *x0_out = coord0;
 680    *x1_out = coord1;
 681    *weight_out = weight;
 682 }
 683
 684
 685 /**
 686  * Build LLVM code for texture wrap mode for nearest filtering.
 687  * \param coord  the incoming texcoord (nominally in [0,1])
 688  * \param length  the texture size along one dimension, as int
 689  * \param is_pot  if TRUE, length is a power of two
 690  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 691  */
 692 static LLVMValueRef
 693 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 694                              LLVMValueRef coord,
 695                              LLVMValueRef length,
 696                              boolean is_pot,
 697                              unsigned wrap_mode)
 698 {
 699    struct lp_build_context *coord_bld = &bld->coord_bld;
 700    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 701    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 702    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 703    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 704    LLVMValueRef icoord;
 705
 706    switch(wrap_mode) {
 707    case PIPE_TEX_WRAP_REPEAT:
 708       coord = lp_build_mul(coord_bld, coord, length_f);
 709       icoord = lp_build_ifloor(coord_bld, coord);
 710       if (is_pot)
 711          icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
 712       else
 713          /* Signed remainder won't give the right results for negative
 714           * dividends but unsigned remainder does.*/
 715          icoord = LLVMBuildURem(bld->builder, icoord, length, "");
 716       break;
 717
 718    case PIPE_TEX_WRAP_CLAMP:
 719    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 720       if (bld->static_state->normalized_coords) {
 721          /* scale coord to length */
 722          coord = lp_build_mul(coord_bld, coord, length_f);
 723       }
 724
 725       /* floor */
 726       icoord = lp_build_ifloor(coord_bld, coord);
 727
 728       /* clamp to [0, length - 1]. */
 729       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 730                               length_minus_one);
 731       break;
 732
 733    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 734       /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
 735       {
 736          LLVMValueRef min, max;
 737
 738          if (bld->static_state->normalized_coords) {
 739             /* scale coord to length */
 740             coord = lp_build_mul(coord_bld, coord, length_f);
 741          }
 742
 743          icoord = lp_build_ifloor(coord_bld, coord);
 744
 745          /* clamp to [-1, length] */
 746          min = lp_build_negate(int_coord_bld, int_coord_bld->one);
 747          max = length;
 748          icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
 749       }
 750       break;
 751
 752    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 753       /* compute mirror function */
 754       coord = lp_build_coord_mirror(bld, coord);
 755
 756       /* scale coord to length */
 757       assert(bld->static_state->normalized_coords);
 758       coord = lp_build_mul(coord_bld, coord, length_f);
 759
 760       icoord = lp_build_ifloor(coord_bld, coord);
 761
 762       /* clamp to [0, length - 1] */
 763       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 764       break;
 765
 766    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 767    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 768       coord = lp_build_abs(coord_bld, coord);
 769
 770       if (bld->static_state->normalized_coords) {
 771          /* scale coord to length */
 772          coord = lp_build_mul(coord_bld, coord, length_f);
 773       }
 774
 775       icoord = lp_build_ifloor(coord_bld, coord);
 776
 777       /* clamp to [0, length - 1] */
 778       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 779       break;
 780
 781    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 782       coord = lp_build_abs(coord_bld, coord);
 783
 784       if (bld->static_state->normalized_coords) {
 785          /* scale coord to length */
 786          coord = lp_build_mul(coord_bld, coord, length_f);
 787       }
 788
 789       icoord = lp_build_ifloor(coord_bld, coord);
 790
 791       /* clamp to [0, length] */
 792       icoord = lp_build_min(int_coord_bld, icoord, length);
 793       break;
 794
 795    default:
 796       assert(0);
 797       icoord = NULL;
 798    }
 799
 800    return icoord;
 801 }
 802
 803
 804 /**
 805  * Codegen equivalent for u_minify().
 806  * Return max(1, base_size >> level);
 807  */
 808 static LLVMValueRef
 809 lp_build_minify(struct lp_build_sample_context *bld,
 810                 LLVMValueRef base_size,
 811                 LLVMValueRef level)
 812 {
 813    LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
 814    size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
 815    return size;
 816 }
 817
 818
 819 /**
 820  * Generate code to compute texture level of detail (lambda).
 821  * \param s  vector of texcoord s values
 822  * \param t  vector of texcoord t values
 823  * \param r  vector of texcoord r values
 824  * \param lod_bias  optional float vector with the shader lod bias
 825  * \param explicit_lod  optional float vector with the explicit lod
 826  * \param width  scalar int texture width
 827  * \param height  scalar int texture height
 828  * \param depth  scalar int texture depth
 829  *
 830  * XXX: The resulting lod is scalar, so ignore all but the first element of
 831  * derivatives, lod_bias, etc that are passed by the shader.
 832  */
 833 static LLVMValueRef
 834 lp_build_lod_selector(struct lp_build_sample_context *bld,
 835                       LLVMValueRef s,
 836                       LLVMValueRef t,
 837                       LLVMValueRef r,
 838                       const LLVMValueRef *ddx,
 839                       const LLVMValueRef *ddy,
 840                       LLVMValueRef lod_bias, /* optional */
 841                       LLVMValueRef explicit_lod, /* optional */
 842                       LLVMValueRef width,
 843                       LLVMValueRef height,
 844                       LLVMValueRef depth)
 845
 846 {
 847    if (bld->static_state->min_lod == bld->static_state->max_lod) {
 848       /* User is forcing sampling from a particular mipmap level.
 849        * This is hit during mipmap generation.
 850        */
 851       return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
 852    }
 853    else {
 854       struct lp_build_context *float_bld = &bld->float_bld;
 855       LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
 856                                                     bld->static_state->lod_bias);
 857       LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
 858                                            bld->static_state->min_lod);
 859       LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
 860                                            bld->static_state->max_lod);
 861       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 862       LLVMValueRef lod;
 863
 864       if (explicit_lod) {
 865          lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
 866                                        index0, "");
 867       }
 868       else {
 869          const int dims = texture_dims(bld->static_state->target);
 870          LLVMValueRef dsdx, dsdy;
 871          LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
 872          LLVMValueRef rho;
 873
 874          /*
 875           * dsdx = abs(s[1] - s[0]);
 876           * dsdy = abs(s[2] - s[0]);
 877           * dtdx = abs(t[1] - t[0]);
 878           * dtdy = abs(t[2] - t[0]);
 879           * drdx = abs(r[1] - r[0]);
 880           * drdy = abs(r[2] - r[0]);
 881           */
 882          dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
 883          dsdx = lp_build_abs(float_bld, dsdx);
 884          dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
 885          dsdy = lp_build_abs(float_bld, dsdy);
 886          if (dims > 1) {
 887             dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
 888             dtdx = lp_build_abs(float_bld, dtdx);
 889             dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
 890             dtdy = lp_build_abs(float_bld, dtdy);
 891             if (dims > 2) {
 892                drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
 893                drdx = lp_build_abs(float_bld, drdx);
 894                drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
 895                drdy = lp_build_abs(float_bld, drdy);
 896             }
 897          }
 898
 899          /* Compute rho = max of all partial derivatives scaled by texture size.
 900           * XXX this could be vectorized somewhat
 901           */
 902          rho = LLVMBuildMul(bld->builder,
 903                             lp_build_max(float_bld, dsdx, dsdy),
 904                             lp_build_int_to_float(float_bld, width), "");
 905          if (dims > 1) {
 906             LLVMValueRef max;
 907             max = LLVMBuildMul(bld->builder,
 908                                lp_build_max(float_bld, dtdx, dtdy),
 909                                lp_build_int_to_float(float_bld, height), "");
 910             rho = lp_build_max(float_bld, rho, max);
 911             if (dims > 2) {
 912                max = LLVMBuildMul(bld->builder,
 913                                   lp_build_max(float_bld, drdx, drdy),
 914                                   lp_build_int_to_float(float_bld, depth), "");
 915                rho = lp_build_max(float_bld, rho, max);
 916             }
 917          }
 918
 919          /* compute lod = log2(rho) */
 920          lod = lp_build_log2(float_bld, rho);
 921
 922          /* add shader lod bias */
 923          if (lod_bias) {
 924             lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
 925                                                index0, "");
 926             lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
 927          }
 928       }
 929
 930       /* add sampler lod bias */
 931       lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
 932
 933       /* clamp lod */
 934       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
 935
 936       return lod;
 937    }
 938 }
 939
 940
 941 /**
 942  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
 943  * mipmap level index.
 944  * Note: this is all scalar code.
 945  * \param lod  scalar float texture level of detail
 946  * \param level_out  returns integer
 947  */
 948 static void
 949 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 950                            unsigned unit,
 951                            LLVMValueRef lod,
 952                            LLVMValueRef *level_out)
 953 {
 954    struct lp_build_context *float_bld = &bld->float_bld;
 955    struct lp_build_context *int_bld = &bld->int_bld;
 956    LLVMValueRef last_level, level;
 957
 958    LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
 959
 960    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 961                                                bld->builder, unit);
 962
 963    /* convert float lod to integer */
 964    level = lp_build_iround(float_bld, lod);
 965
 966    /* clamp level to legal range of levels */
 967    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
 968 }
 969
 970
 971 /**
 972  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
 973  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
 974  * two mipmap levels and interpolate between them.
 975  */
 976 static void
 977 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 978                            unsigned unit,
 979                            LLVMValueRef lod,
 980                            LLVMValueRef *level0_out,
 981                            LLVMValueRef *level1_out,
 982                            LLVMValueRef *weight_out)
 983 {
 984    struct lp_build_context *float_bld = &bld->float_bld;
 985    struct lp_build_context *int_bld = &bld->int_bld;
 986    LLVMValueRef last_level, level;
 987
 988    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 989                                                bld->builder, unit);
 990
 991    /* convert float lod to integer */
 992    level = lp_build_ifloor(float_bld, lod);
 993
 994    /* compute level 0 and clamp to legal range of levels */
 995    *level0_out = lp_build_clamp(int_bld, level,
 996                                 int_bld->zero,
 997                                 last_level);
 998    /* compute level 1 and clamp to legal range of levels */
 999    level = lp_build_add(int_bld, level, int_bld->one);
1000    *level1_out = lp_build_clamp(int_bld, level,
1001                                 int_bld->zero,
1002                                 last_level);
1003
1004    *weight_out = lp_build_fract(float_bld, lod);
1005 }
1006
1007
1008 /**
1009  * Generate code to sample a mipmap level with nearest filtering.
1010  * If sampling a cube texture, r = cube face in [0,5].
1011  */
1012 static void
1013 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
1014                               LLVMValueRef width_vec,
1015                               LLVMValueRef height_vec,
1016                               LLVMValueRef depth_vec,
1017                               LLVMValueRef row_stride_vec,
1018                               LLVMValueRef img_stride_vec,
1019                               LLVMValueRef data_ptr,
1020                               LLVMValueRef s,
1021                               LLVMValueRef t,
1022                               LLVMValueRef r,
1023                               LLVMValueRef colors_out[4])
1024 {
1025    const int dims = texture_dims(bld->static_state->target);
1026    LLVMValueRef x, y, z;
1027
1028    /*
1029     * Compute integer texcoords.
1030     */
1031    x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1032                                     bld->static_state->pot_width,
1033                                     bld->static_state->wrap_s);
1034    lp_build_name(x, "tex.x.wrapped");
1035
1036    if (dims >= 2) {
1037       y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1038                                        bld->static_state->pot_height,
1039                                        bld->static_state->wrap_t);
1040       lp_build_name(y, "tex.y.wrapped");
1041
1042       if (dims == 3) {
1043          z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1044                                           bld->static_state->pot_height,
1045                                           bld->static_state->wrap_r);
1046          lp_build_name(z, "tex.z.wrapped");
1047       }
1048       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1049          z = r;
1050       }
1051       else {
1052          z = NULL;
1053       }
1054    }
1055    else {
1056       y = z = NULL;
1057    }
1058
1059    /*
1060     * Get texture colors.
1061     */
1062    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1063                              x, y, z,
1064                              row_stride_vec, img_stride_vec,
1065                              data_ptr, colors_out);
1066 }
1067
1068
1069 /**
1070  * Generate code to sample a mipmap level with linear filtering.
1071  * If sampling a cube texture, r = cube face in [0,5].
1072  */
1073 static void
1074 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1075                              LLVMValueRef width_vec,
1076                              LLVMValueRef height_vec,
1077                              LLVMValueRef depth_vec,
1078                              LLVMValueRef row_stride_vec,
1079                              LLVMValueRef img_stride_vec,
1080                              LLVMValueRef data_ptr,
1081                              LLVMValueRef s,
1082                              LLVMValueRef t,
1083                              LLVMValueRef r,
1084                              LLVMValueRef colors_out[4])
1085 {
1086    const int dims = texture_dims(bld->static_state->target);
1087    LLVMValueRef x0, y0, z0, x1, y1, z1;
1088    LLVMValueRef s_fpart, t_fpart, r_fpart;
1089    LLVMValueRef neighbors[2][2][4];
1090    int chan;
1091
1092    /*
1093     * Compute integer texcoords.
1094     */
1095    lp_build_sample_wrap_linear(bld, s, width_vec,
1096                                bld->static_state->pot_width,
1097                                bld->static_state->wrap_s,
1098                                &x0, &x1, &s_fpart);
1099    lp_build_name(x0, "tex.x0.wrapped");
1100    lp_build_name(x1, "tex.x1.wrapped");
1101
1102    if (dims >= 2) {
1103       lp_build_sample_wrap_linear(bld, t, height_vec,
1104                                   bld->static_state->pot_height,
1105                                   bld->static_state->wrap_t,
1106                                   &y0, &y1, &t_fpart);
1107       lp_build_name(y0, "tex.y0.wrapped");
1108       lp_build_name(y1, "tex.y1.wrapped");
1109
1110       if (dims == 3) {
1111          lp_build_sample_wrap_linear(bld, r, depth_vec,
1112                                      bld->static_state->pot_depth,
1113                                      bld->static_state->wrap_r,
1114                                      &z0, &z1, &r_fpart);
1115          lp_build_name(z0, "tex.z0.wrapped");
1116          lp_build_name(z1, "tex.z1.wrapped");
1117       }
1118       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1119          z0 = z1 = r;  /* cube face */
1120          r_fpart = NULL;
1121       }
1122       else {
1123          z0 = z1 = NULL;
1124          r_fpart = NULL;
1125       }
1126    }
1127    else {
1128       y0 = y1 = t_fpart = NULL;
1129       z0 = z1 = r_fpart = NULL;
1130    }
1131
1132    /*
1133     * Get texture colors.
1134     */
1135    /* get x0/x1 texels */
1136    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1137                              x0, y0, z0,
1138                              row_stride_vec, img_stride_vec,
1139                              data_ptr, neighbors[0][0]);
1140    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1141                              x1, y0, z0,
1142                              row_stride_vec, img_stride_vec,
1143                              data_ptr, neighbors[0][1]);
1144
1145    if (dims == 1) {
1146       /* Interpolate two samples from 1D image to produce one color */
1147       for (chan = 0; chan < 4; chan++) {
1148          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1149                                           neighbors[0][0][chan],
1150                                           neighbors[0][1][chan]);
1151       }
1152    }
1153    else {
1154       /* 2D/3D texture */
1155       LLVMValueRef colors0[4];
1156
1157       /* get x0/x1 texels at y1 */
1158       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1159                                 x0, y1, z0,
1160                                 row_stride_vec, img_stride_vec,
1161                                 data_ptr, neighbors[1][0]);
1162       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1163                                 x1, y1, z0,
1164                                 row_stride_vec, img_stride_vec,
1165                                 data_ptr, neighbors[1][1]);
1166
1167       /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1168       for (chan = 0; chan < 4; chan++) {
1169          colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1170                                           s_fpart, t_fpart,
1171                                           neighbors[0][0][chan],
1172                                           neighbors[0][1][chan],
1173                                           neighbors[1][0][chan],
1174                                           neighbors[1][1][chan]);
1175       }
1176
1177       if (dims == 3) {
1178          LLVMValueRef neighbors1[2][2][4];
1179          LLVMValueRef colors1[4];
1180
1181          /* get x0/x1/y0/y1 texels at z1 */
1182          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1183                                    x0, y0, z1,
1184                                    row_stride_vec, img_stride_vec,
1185                                    data_ptr, neighbors1[0][0]);
1186          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1187                                    x1, y0, z1,
1188                                    row_stride_vec, img_stride_vec,
1189                                    data_ptr, neighbors1[0][1]);
1190          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1191                                    x0, y1, z1,
1192                                    row_stride_vec, img_stride_vec,
1193                                    data_ptr, neighbors1[1][0]);
1194          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1195                                    x1, y1, z1,
1196                                    row_stride_vec, img_stride_vec,
1197                                    data_ptr, neighbors1[1][1]);
1198
1199          /* Bilinear interpolate the four samples from the second Z slice */
1200          for (chan = 0; chan < 4; chan++) {
1201             colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1202                                              s_fpart, t_fpart,
1203                                              neighbors1[0][0][chan],
1204                                              neighbors1[0][1][chan],
1205                                              neighbors1[1][0][chan],
1206                                              neighbors1[1][1][chan]);
1207          }
1208
1209          /* Linearly interpolate the two samples from the two 3D slices */
1210          for (chan = 0; chan < 4; chan++) {
1211             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1212                                              r_fpart,
1213                                              colors0[chan], colors1[chan]);
1214          }
1215       }
1216       else {
1217          /* 2D tex */
1218          for (chan = 0; chan < 4; chan++) {
1219             colors_out[chan] = colors0[chan];
1220          }
1221       }
1222    }
1223 }
1224
1225
1226 /** Helper used by lp_build_cube_lookup() */
1227 static LLVMValueRef
1228 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
1229 {
1230    /* ima = -0.5 / abs(coord); */
1231    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
1232    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1233    LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
1234                                    lp_build_rcp(coord_bld, absCoord));
1235    return ima;
1236 }
1237
1238
1239 /**
1240  * Helper used by lp_build_cube_lookup()
1241  * \param sign  scalar +1 or -1
1242  * \param coord  float vector
1243  * \param ima  float vector
1244  */
1245 static LLVMValueRef
1246 lp_build_cube_coord(struct lp_build_context *coord_bld,
1247                     LLVMValueRef sign, int negate_coord,
1248                     LLVMValueRef coord, LLVMValueRef ima)
1249 {
1250    /* return negate(coord) * ima * sign + 0.5; */
1251    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
1252    LLVMValueRef res;
1253
1254    assert(negate_coord == +1 || negate_coord == -1);
1255
1256    if (negate_coord == -1) {
1257       coord = lp_build_negate(coord_bld, coord);
1258    }
1259
1260    res = lp_build_mul(coord_bld, coord, ima);
1261    if (sign) {
1262       sign = lp_build_broadcast_scalar(coord_bld, sign);
1263       res = lp_build_mul(coord_bld, res, sign);
1264    }
1265    res = lp_build_add(coord_bld, res, half);
1266
1267    return res;
1268 }
1269
1270
1271 /** Helper used by lp_build_cube_lookup()
1272  * Return (major_coord >= 0) ? pos_face : neg_face;
1273  */
1274 static LLVMValueRef
1275 lp_build_cube_face(struct lp_build_sample_context *bld,
1276                    LLVMValueRef major_coord,
1277                    unsigned pos_face, unsigned neg_face)
1278 {
1279    LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1280                                     major_coord,
1281                                     bld->float_bld.zero, "");
1282    LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
1283    LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
1284    LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
1285    return res;
1286 }
1287
1288
1289
1290 /**
1291  * Generate code to do cube face selection and per-face texcoords.
1292  */
1293 static void
1294 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1295                      LLVMValueRef s,
1296                      LLVMValueRef t,
1297                      LLVMValueRef r,
1298                      LLVMValueRef *face,
1299                      LLVMValueRef *face_s,
1300                      LLVMValueRef *face_t)
1301 {
1302    struct lp_build_context *float_bld = &bld->float_bld;
1303    struct lp_build_context *coord_bld = &bld->coord_bld;
1304    LLVMValueRef rx, ry, rz;
1305    LLVMValueRef arx, ary, arz;
1306    LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
1307    LLVMValueRef arx_ge_ary, arx_ge_arz;
1308    LLVMValueRef ary_ge_arx, ary_ge_arz;
1309    LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
1310    LLVMValueRef rx_pos, ry_pos, rz_pos;
1311
1312    assert(bld->coord_bld.type.length == 4);
1313
1314    /*
1315     * Use the average of the four pixel's texcoords to choose the face.
1316     */
1317    rx = lp_build_mul(float_bld, c25,
1318                      lp_build_sum_vector(&bld->coord_bld, s));
1319    ry = lp_build_mul(float_bld, c25,
1320                      lp_build_sum_vector(&bld->coord_bld, t));
1321    rz = lp_build_mul(float_bld, c25,
1322                      lp_build_sum_vector(&bld->coord_bld, r));
1323
1324    arx = lp_build_abs(float_bld, rx);
1325    ary = lp_build_abs(float_bld, ry);
1326    arz = lp_build_abs(float_bld, rz);
1327
1328    /*
1329     * Compare sign/magnitude of rx,ry,rz to determine face
1330     */
1331    arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
1332    arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
1333    ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
1334    ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
1335
1336    arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
1337    ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1338
1339    rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
1340    ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
1341    rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
1342
1343    {
1344       struct lp_build_flow_context *flow_ctx;
1345       struct lp_build_if_state if_ctx;
1346
1347       flow_ctx = lp_build_flow_create(bld->builder);
1348       lp_build_flow_scope_begin(flow_ctx);
1349
1350       *face_s = bld->coord_bld.undef;
1351       *face_t = bld->coord_bld.undef;
1352       *face = bld->int_bld.undef;
1353
1354       lp_build_name(*face_s, "face_s");
1355       lp_build_name(*face_t, "face_t");
1356       lp_build_name(*face, "face");
1357
1358       lp_build_flow_scope_declare(flow_ctx, face_s);
1359       lp_build_flow_scope_declare(flow_ctx, face_t);
1360       lp_build_flow_scope_declare(flow_ctx, face);
1361
1362       lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
1363       {
1364          /* +/- X face */
1365          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
1366          LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
1367          *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
1368          *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1369          *face = lp_build_cube_face(bld, rx,
1370                                     PIPE_TEX_FACE_POS_X,
1371                                     PIPE_TEX_FACE_NEG_X);
1372       }
1373       lp_build_else(&if_ctx);
1374       {
1375          struct lp_build_flow_context *flow_ctx2;
1376          struct lp_build_if_state if_ctx2;
1377
1378          LLVMValueRef face_s2 = bld->coord_bld.undef;
1379          LLVMValueRef face_t2 = bld->coord_bld.undef;
1380          LLVMValueRef face2 = bld->int_bld.undef;
1381
1382          flow_ctx2 = lp_build_flow_create(bld->builder);
1383          lp_build_flow_scope_begin(flow_ctx2);
1384          lp_build_flow_scope_declare(flow_ctx2, &face_s2);
1385          lp_build_flow_scope_declare(flow_ctx2, &face_t2);
1386          lp_build_flow_scope_declare(flow_ctx2, &face2);
1387
1388          ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1389
1390          lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
1391          {
1392             /* +/- Y face */
1393             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
1394             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
1395             face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
1396             face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
1397             face2 = lp_build_cube_face(bld, ry,
1398                                        PIPE_TEX_FACE_POS_Y,
1399                                        PIPE_TEX_FACE_NEG_Y);
1400          }
1401          lp_build_else(&if_ctx2);
1402          {
1403             /* +/- Z face */
1404             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
1405             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
1406             face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
1407             face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1408             face2 = lp_build_cube_face(bld, rz,
1409                                        PIPE_TEX_FACE_POS_Z,
1410                                        PIPE_TEX_FACE_NEG_Z);
1411          }
1412          lp_build_endif(&if_ctx2);
1413          lp_build_flow_scope_end(flow_ctx2);
1414          lp_build_flow_destroy(flow_ctx2);
1415
1416          *face_s = face_s2;
1417          *face_t = face_t2;
1418          *face = face2;
1419       }
1420
1421       lp_build_endif(&if_ctx);
1422       lp_build_flow_scope_end(flow_ctx);
1423       lp_build_flow_destroy(flow_ctx);
1424    }
1425 }
1426
1427
1428
1429 /**
1430  * Sample the texture/mipmap using given image filter and mip filter.
1431  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1432  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1433  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1434  */
1435 static void
1436 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1437                        unsigned img_filter,
1438                        unsigned mip_filter,
1439                        LLVMValueRef s,
1440                        LLVMValueRef t,
1441                        LLVMValueRef r,
1442                        LLVMValueRef lod_fpart,
1443                        LLVMValueRef width0_vec,
1444                        LLVMValueRef width1_vec,
1445                        LLVMValueRef height0_vec,
1446                        LLVMValueRef height1_vec,
1447                        LLVMValueRef depth0_vec,
1448                        LLVMValueRef depth1_vec,
1449                        LLVMValueRef row_stride0_vec,
1450                        LLVMValueRef row_stride1_vec,
1451                        LLVMValueRef img_stride0_vec,
1452                        LLVMValueRef img_stride1_vec,
1453                        LLVMValueRef data_ptr0,
1454                        LLVMValueRef data_ptr1,
1455                        LLVMValueRef *colors_out)
1456 {
1457    LLVMValueRef colors0[4], colors1[4];
1458    int chan;
1459
1460    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1461       lp_build_sample_image_nearest(bld,
1462                                     width0_vec, height0_vec, depth0_vec,
1463                                     row_stride0_vec, img_stride0_vec,
1464                                     data_ptr0, s, t, r, colors0);
1465
1466       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1467          /* sample the second mipmap level, and interp */
1468          lp_build_sample_image_nearest(bld,
1469                                        width1_vec, height1_vec, depth1_vec,
1470                                        row_stride1_vec, img_stride1_vec,
1471                                        data_ptr1, s, t, r, colors1);
1472       }
1473    }
1474    else {
1475       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1476
1477       lp_build_sample_image_linear(bld,
1478                                    width0_vec, height0_vec, depth0_vec,
1479                                    row_stride0_vec, img_stride0_vec,
1480                                    data_ptr0, s, t, r, colors0);
1481
1482       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1483          /* sample the second mipmap level, and interp */
1484          lp_build_sample_image_linear(bld,
1485                                       width1_vec, height1_vec, depth1_vec,
1486                                       row_stride1_vec, img_stride1_vec,
1487                                       data_ptr1, s, t, r, colors1);
1488       }
1489    }
1490
1491    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1492       /* interpolate samples from the two mipmap levels */
1493       for (chan = 0; chan < 4; chan++) {
1494          colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1495                                           colors0[chan], colors1[chan]);
1496       }
1497    }
1498    else {
1499       /* use first/only level's colors */
1500       for (chan = 0; chan < 4; chan++) {
1501          colors_out[chan] = colors0[chan];
1502       }
1503    }
1504 }
1505
1506
1507
1508 /**
1509  * General texture sampling codegen.
1510  * This function handles texture sampling for all texture targets (1D,
1511  * 2D, 3D, cube) and all filtering modes.
1512  */
1513 static void
1514 lp_build_sample_general(struct lp_build_sample_context *bld,
1515                         unsigned unit,
1516                         LLVMValueRef s,
1517                         LLVMValueRef t,
1518                         LLVMValueRef r,
1519                         const LLVMValueRef *ddx,
1520                         const LLVMValueRef *ddy,
1521                         LLVMValueRef lod_bias, /* optional */
1522                         LLVMValueRef explicit_lod, /* optional */
1523                         LLVMValueRef width,
1524                         LLVMValueRef height,
1525                         LLVMValueRef depth,
1526                         LLVMValueRef width_vec,
1527                         LLVMValueRef height_vec,
1528                         LLVMValueRef depth_vec,
1529                         LLVMValueRef row_stride_array,
1530                         LLVMValueRef img_stride_array,
1531                         LLVMValueRef data_array,
1532                         LLVMValueRef *colors_out)
1533 {
1534    struct lp_build_context *float_bld = &bld->float_bld;
1535    const unsigned mip_filter = bld->static_state->min_mip_filter;
1536    const unsigned min_filter = bld->static_state->min_img_filter;
1537    const unsigned mag_filter = bld->static_state->mag_img_filter;
1538    const int dims = texture_dims(bld->static_state->target);
1539    LLVMValueRef lod = NULL, lod_fpart = NULL;
1540    LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
1541    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1542    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1543    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1544    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1545    LLVMValueRef data_ptr0, data_ptr1 = NULL;
1546
1547    /*
1548    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1549           mip_filter, min_filter, mag_filter);
1550    */
1551
1552    /*
1553     * Compute the level of detail (float).
1554     */
1555    if (min_filter != mag_filter ||
1556        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1557       /* Need to compute lod either to choose mipmap levels or to
1558        * distinguish between minification/magnification with one mipmap level.
1559        */
1560       lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy,
1561                                   lod_bias, explicit_lod,
1562                                   width, height, depth);
1563    }
1564
1565    /*
1566     * Compute integer mipmap level(s) to fetch texels from.
1567     */
1568    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1569       /* always use mip level 0 */
1570       ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1571    }
1572    else {
1573       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1574          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1575       }
1576       else {
1577          assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1578          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1579                                     &lod_fpart);
1580          lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1581       }
1582    }
1583
1584    /*
1585     * Convert scalar integer mipmap levels into vectors.
1586     */
1587    ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1588    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1589       ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1590
1591    /*
1592     * Compute width, height at mipmap level 'ilevel0'
1593     */
1594    width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1595    if (dims >= 2) {
1596       height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1597       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1598                                                       ilevel0);
1599       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1600          img_stride0_vec = lp_build_get_level_stride_vec(bld,
1601                                                          img_stride_array,
1602                                                          ilevel0);
1603          if (dims == 3) {
1604             depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1605          }
1606       }
1607    }
1608    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1609       /* compute width, height, depth for second mipmap level at 'ilevel1' */
1610       width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1611       if (dims >= 2) {
1612          height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1613          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1614                                                          ilevel1);
1615          if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1616             img_stride1_vec = lp_build_get_level_stride_vec(bld,
1617                                                             img_stride_array,
1618                                                             ilevel1);
1619             if (dims ==3) {
1620                depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1621             }
1622          }
1623       }
1624    }
1625
1626    /*
1627     * Choose cube face, recompute per-face texcoords.
1628     */
1629    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1630       LLVMValueRef face, face_s, face_t;
1631       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
1632       s = face_s; /* vec */
1633       t = face_t; /* vec */
1634       /* use 'r' to indicate cube face */
1635       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
1636    }
1637
1638    /*
1639     * Get pointer(s) to image data for mipmap level(s).
1640     */
1641    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1642    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1643       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1644    }
1645
1646    /*
1647     * Get/interpolate texture colors.
1648     */
1649    if (min_filter == mag_filter) {
1650       /* no need to distinquish between minification and magnification */
1651       lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
1652                              width0_vec, width1_vec,
1653                              height0_vec, height1_vec,
1654                              depth0_vec, depth1_vec,
1655                              row_stride0_vec, row_stride1_vec,
1656                              img_stride0_vec, img_stride1_vec,
1657                              data_ptr0, data_ptr1,
1658                              colors_out);
1659    }
1660    else {
1661       /* Emit conditional to choose min image filter or mag image filter
1662        * depending on the lod being >0 or <= 0, respectively.
1663        */
1664       struct lp_build_flow_context *flow_ctx;
1665       struct lp_build_if_state if_ctx;
1666       LLVMValueRef minify;
1667
1668       flow_ctx = lp_build_flow_create(bld->builder);
1669       lp_build_flow_scope_begin(flow_ctx);
1670
1671       lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
1672       lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
1673       lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
1674       lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
1675
1676       /* minify = lod > 0.0 */
1677       minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1678                              lod, float_bld->zero, "");
1679
1680       lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
1681       {
1682          /* Use the minification filter */
1683          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1684                                 s, t, r, lod_fpart,
1685                                 width0_vec, width1_vec,
1686                                 height0_vec, height1_vec,
1687                                 depth0_vec, depth1_vec,
1688                                 row_stride0_vec, row_stride1_vec,
1689                                 img_stride0_vec, img_stride1_vec,
1690                                 data_ptr0, data_ptr1,
1691                                 colors_out);
1692       }
1693       lp_build_else(&if_ctx);
1694       {
1695          /* Use the magnification filter */
1696          lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1697                                 s, t, r, lod_fpart,
1698                                 width0_vec, width1_vec,
1699                                 height0_vec, height1_vec,
1700                                 depth0_vec, depth1_vec,
1701                                 row_stride0_vec, row_stride1_vec,
1702                                 img_stride0_vec, img_stride1_vec,
1703                                 data_ptr0, data_ptr1,
1704                                 colors_out);
1705       }
1706       lp_build_endif(&if_ctx);
1707
1708       lp_build_flow_scope_end(flow_ctx);
1709       lp_build_flow_destroy(flow_ctx);
1710    }
1711 }
1712
1713
1714
1715 static void
1716 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1717                               LLVMValueRef s,
1718                               LLVMValueRef t,
1719                               LLVMValueRef width,
1720                               LLVMValueRef height,
1721                               LLVMValueRef stride_array,
1722                               LLVMValueRef data_array,
1723                               LLVMValueRef texel_out[4])
1724 {
1725    LLVMBuilderRef builder = bld->builder;
1726    struct lp_build_context i32, h16, u8n;
1727    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1728    LLVMValueRef i32_c8, i32_c128, i32_c255;
1729    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1730    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1731    LLVMValueRef x0, x1;
1732    LLVMValueRef y0, y1;
1733    LLVMValueRef neighbors[2][2];
1734    LLVMValueRef neighbors_lo[2][2];
1735    LLVMValueRef neighbors_hi[2][2];
1736    LLVMValueRef packed, packed_lo, packed_hi;
1737    LLVMValueRef unswizzled[4];
1738    LLVMValueRef stride;
1739
1740    assert(bld->static_state->target == PIPE_TEXTURE_2D);
1741    assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
1742    assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
1743    assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
1744
1745    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1746    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1747    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1748
1749    i32_vec_type = lp_build_vec_type(i32.type);
1750    h16_vec_type = lp_build_vec_type(h16.type);
1751    u8n_vec_type = lp_build_vec_type(u8n.type);
1752
1753    if (bld->static_state->normalized_coords) {
1754       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1755       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1756       LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1757       s = lp_build_mul(&bld->coord_bld, s, fp_width);
1758       t = lp_build_mul(&bld->coord_bld, t, fp_height);
1759    }
1760
1761    /* scale coords by 256 (8 fractional bits) */
1762    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1763    t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1764
1765    /* convert float to int */
1766    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1767    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1768
1769    /* subtract 0.5 (add -128) */
1770    i32_c128 = lp_build_const_int_vec(i32.type, -128);
1771    s = LLVMBuildAdd(builder, s, i32_c128, "");
1772    t = LLVMBuildAdd(builder, t, i32_c128, "");
1773
1774    /* compute floor (shift right 8) */
1775    i32_c8 = lp_build_const_int_vec(i32.type, 8);
1776    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1777    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1778
1779    /* compute fractional part (AND with 0xff) */
1780    i32_c255 = lp_build_const_int_vec(i32.type, 255);
1781    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1782    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1783
1784    x0 = s_ipart;
1785    y0 = t_ipart;
1786
1787    x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1788    y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1789
1790    x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1791                                  bld->static_state->wrap_s);
1792    y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1793                                  bld->static_state->wrap_t);
1794
1795    x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1796                                  bld->static_state->wrap_s);
1797    y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1798                                  bld->static_state->wrap_t);
1799
1800    /*
1801     * Transform 4 x i32 in
1802     *
1803     *   s_fpart = {s0, s1, s2, s3}
1804     *
1805     * into 8 x i16
1806     *
1807     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1808     *
1809     * into two 8 x i16
1810     *
1811     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1812     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1813     *
1814     * and likewise for t_fpart. There is no risk of loosing precision here
1815     * since the fractional parts only use the lower 8bits.
1816     */
1817
1818    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1819    t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1820
1821    {
1822       LLVMTypeRef elem_type = LLVMInt32Type();
1823       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1824       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1825       LLVMValueRef shuffle_lo;
1826       LLVMValueRef shuffle_hi;
1827       unsigned i, j;
1828
1829       for(j = 0; j < h16.type.length; j += 4) {
1830          unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
1831          LLVMValueRef index;
1832
1833          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1834          for(i = 0; i < 4; ++i)
1835             shuffles_lo[j + i] = index;
1836
1837          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1838          for(i = 0; i < 4; ++i)
1839             shuffles_hi[j + i] = index;
1840       }
1841
1842       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1843       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1844
1845       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1846       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1847       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1848       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1849    }
1850
1851    stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
1852
1853    /*
1854     * Fetch the pixels as 4 x 32bit (rgba order might differ):
1855     *
1856     *   rgba0 rgba1 rgba2 rgba3
1857     *
1858     * bit cast them into 16 x u8
1859     *
1860     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1861     *
1862     * unpack them into two 8 x i16:
1863     *
1864     *   r0 g0 b0 a0 r1 g1 b1 a1
1865     *   r2 g2 b2 a2 r3 g3 b3 a3
1866     *
1867     * The higher 8 bits of the resulting elements will be zero.
1868     */
1869
1870    neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1871    neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1872    neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1873    neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1874
1875    neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1876    neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1877    neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1878    neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1879
1880    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1881    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1882    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1883    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1884
1885    /*
1886     * Linear interpolate with 8.8 fixed point.
1887     */
1888
1889    packed_lo = lp_build_lerp_2d(&h16,
1890                                 s_fpart_lo, t_fpart_lo,
1891                                 neighbors_lo[0][0],
1892                                 neighbors_lo[0][1],
1893                                 neighbors_lo[1][0],
1894                                 neighbors_lo[1][1]);
1895
1896    packed_hi = lp_build_lerp_2d(&h16,
1897                                 s_fpart_hi, t_fpart_hi,
1898                                 neighbors_hi[0][0],
1899                                 neighbors_hi[0][1],
1900                                 neighbors_hi[1][0],
1901                                 neighbors_hi[1][1]);
1902
1903    packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1904
1905    /*
1906     * Convert to SoA and swizzle.
1907     */
1908
1909    lp_build_rgba8_to_f32_soa(bld->builder,
1910                              bld->texel_type,
1911                              packed, unswizzled);
1912
1913    lp_build_format_swizzle_soa(bld->format_desc,
1914                                &bld->texel_bld,
1915                                unswizzled, texel_out);
1916
1917    apply_sampler_swizzle(bld, texel_out);
1918 }
1919
1920
1921 static void
1922 lp_build_sample_compare(struct lp_build_sample_context *bld,
1923                         LLVMValueRef p,
1924                         LLVMValueRef texel[4])
1925 {
1926    struct lp_build_context *texel_bld = &bld->texel_bld;
1927    LLVMValueRef res;
1928    unsigned chan;
1929
1930    if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1931       return;
1932
1933    /* TODO: Compare before swizzling, to avoid redundant computations */
1934    res = NULL;
1935    for(chan = 0; chan < 4; ++chan) {
1936       LLVMValueRef cmp;
1937       cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1938       cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1939
1940       if(res)
1941          res = lp_build_add(texel_bld, res, cmp);
1942       else
1943          res = cmp;
1944    }
1945
1946    assert(res);
1947    res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
1948
1949    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1950    for(chan = 0; chan < 3; ++chan)
1951       texel[chan] = res;
1952    texel[3] = texel_bld->one;
1953 }
1954
1955
1956 /**
1957  * Just set texels to white instead of actually sampling the texture.
1958  * For debugging.
1959  */
1960 static void
1961 lp_build_sample_nop(struct lp_build_sample_context *bld,
1962                     LLVMValueRef texel_out[4])
1963 {
1964    struct lp_build_context *texel_bld = &bld->texel_bld;
1965    unsigned chan;
1966
1967    for (chan = 0; chan < 4; chan++) {
1968       /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
1969       texel_out[chan] = texel_bld->one;
1970    }
1971 }
1972
1973
1974 /**
1975  * Build texture sampling code.
1976  * 'texel' will return a vector of four LLVMValueRefs corresponding to
1977  * R, G, B, A.
1978  * \param type  vector float type to use for coords, etc.
1979  */
1980 void
1981 lp_build_sample_soa(LLVMBuilderRef builder,
1982                     const struct lp_sampler_static_state *static_state,
1983                     struct lp_sampler_dynamic_state *dynamic_state,
1984                     struct lp_type type,
1985                     unsigned unit,
1986                     unsigned num_coords,
1987                     const LLVMValueRef *coords,
1988                     const LLVMValueRef *ddx,
1989                     const LLVMValueRef *ddy,
1990                     LLVMValueRef lod_bias, /* optional */
1991                     LLVMValueRef explicit_lod, /* optional */
1992                     LLVMValueRef texel_out[4])
1993 {
1994    struct lp_build_sample_context bld;
1995    LLVMValueRef width, width_vec;
1996    LLVMValueRef height, height_vec;
1997    LLVMValueRef depth, depth_vec;
1998    LLVMValueRef row_stride_array, img_stride_array;
1999    LLVMValueRef data_array;
2000    LLVMValueRef s;
2001    LLVMValueRef t;
2002    LLVMValueRef r;
2003
2004    if (0) {
2005       enum pipe_format fmt = static_state->format;
2006       debug_printf("Sample from %s\n", util_format_name(fmt));
2007    }
2008
2009    /* Setup our build context */
2010    memset(&bld, 0, sizeof bld);
2011    bld.builder = builder;
2012    bld.static_state = static_state;
2013    bld.dynamic_state = dynamic_state;
2014    bld.format_desc = util_format_description(static_state->format);
2015
2016    bld.float_type = lp_type_float(32);
2017    bld.int_type = lp_type_int(32);
2018    bld.coord_type = type;
2019    bld.uint_coord_type = lp_uint_type(type);
2020    bld.int_coord_type = lp_int_type(type);
2021    bld.texel_type = type;
2022
2023    lp_build_context_init(&bld.float_bld, builder, bld.float_type);
2024    lp_build_context_init(&bld.int_bld, builder, bld.int_type);
2025    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
2026    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
2027    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
2028    lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
2029
2030    /* Get the dynamic state */
2031    width = dynamic_state->width(dynamic_state, builder, unit);
2032    height = dynamic_state->height(dynamic_state, builder, unit);
2033    depth = dynamic_state->depth(dynamic_state, builder, unit);
2034    row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
2035    img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
2036    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
2037    /* Note that data_array is an array[level] of pointers to texture images */
2038
2039    s = coords[0];
2040    t = coords[1];
2041    r = coords[2];
2042
2043    width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
2044    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
2045    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
2046
2047    if (0) {
2048       /* For debug: no-op texture sampling */
2049       lp_build_sample_nop(&bld, texel_out);
2050    }
2051    else if (util_format_is_rgba8_variant(bld.format_desc) &&
2052             static_state->target == PIPE_TEXTURE_2D &&
2053             static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
2054             static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
2055             static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2056             is_simple_wrap_mode(static_state->wrap_s) &&
2057             is_simple_wrap_mode(static_state->wrap_t)) {
2058       /* special case */
2059       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
2060                                     row_stride_array, data_array, texel_out);
2061    }
2062    else {
2063       lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
2064                               lod_bias, explicit_lod,
2065                               width, height, depth,
2066                               width_vec, height_vec, depth_vec,
2067                               row_stride_array, img_stride_array,
2068                               data_array,
2069                               texel_out);
2070    }
2071
2072    lp_build_sample_compare(&bld, r, texel_out);
2073 }