src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "util/u_cpu_detect.h"
  44 #include "lp_bld_debug.h"
  45 #include "lp_bld_type.h"
  46 #include "lp_bld_const.h"
  47 #include "lp_bld_conv.h"
  48 #include "lp_bld_arit.h"
  49 #include "lp_bld_logic.h"
  50 #include "lp_bld_swizzle.h"
  51 #include "lp_bld_pack.h"
  52 #include "lp_bld_flow.h"
  53 #include "lp_bld_format.h"
  54 #include "lp_bld_sample.h"
  55
  56
  57 /**
  58  * Keep all information for sampling code generation in a single place.
  59  */
  60 struct lp_build_sample_context
  61 {
  62    LLVMBuilderRef builder;
  63
  64    const struct lp_sampler_static_state *static_state;
  65
  66    struct lp_sampler_dynamic_state *dynamic_state;
  67
  68    const struct util_format_description *format_desc;
  69
  70    /** regular scalar float type */
  71    struct lp_type float_type;
  72    struct lp_build_context float_bld;
  73
  74    /** regular scalar float type */
  75    struct lp_type int_type;
  76    struct lp_build_context int_bld;
  77
  78    /** Incoming coordinates type and build context */
  79    struct lp_type coord_type;
  80    struct lp_build_context coord_bld;
  81
  82    /** Unsigned integer coordinates */
  83    struct lp_type uint_coord_type;
  84    struct lp_build_context uint_coord_bld;
  85
  86    /** Signed integer coordinates */
  87    struct lp_type int_coord_type;
  88    struct lp_build_context int_coord_bld;
  89
  90    /** Output texels type and build context */
  91    struct lp_type texel_type;
  92    struct lp_build_context texel_bld;
  93 };
  94
  95
  96 /**
  97  * Does the given texture wrap mode allow sampling the texture border color?
  98  * XXX maybe move this into gallium util code.
  99  */
 100 static boolean
 101 wrap_mode_uses_border_color(unsigned mode)
 102 {
 103    switch (mode) {
 104    case PIPE_TEX_WRAP_REPEAT:
 105    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 106    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 107    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 108       return FALSE;
 109    case PIPE_TEX_WRAP_CLAMP:
 110    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 111    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 112    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 113       return TRUE;
 114    default:
 115       assert(0 && "unexpected wrap mode");
 116       return FALSE;
 117    }
 118 }
 119
 120
 121 static LLVMValueRef
 122 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 123                           LLVMValueRef data_array, LLVMValueRef level)
 124 {
 125    LLVMValueRef indexes[2], data_ptr;
 126    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 127    indexes[1] = level;
 128    data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
 129    data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
 130    return data_ptr;
 131 }
 132
 133
 134 static LLVMValueRef
 135 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 136                                 LLVMValueRef data_array, int level)
 137 {
 138    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 139    return lp_build_get_mipmap_level(bld, data_array, lvl);
 140 }
 141
 142
 143 /**
 144  * Dereference stride_array[mipmap_level] array to get a stride.
 145  * Return stride as a vector.
 146  */
 147 static LLVMValueRef
 148 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 149                               LLVMValueRef stride_array, LLVMValueRef level)
 150 {
 151    LLVMValueRef indexes[2], stride;
 152    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 153    indexes[1] = level;
 154    stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
 155    stride = LLVMBuildLoad(bld->builder, stride, "");
 156    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 157    return stride;
 158 }
 159
 160
 161 /** Dereference stride_array[0] array to get a stride (as vector). */
 162 static LLVMValueRef
 163 lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
 164                                     LLVMValueRef stride_array, int level)
 165 {
 166    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 167    return lp_build_get_level_stride_vec(bld, stride_array, lvl);
 168 }
 169
 170
 171 static int
 172 texture_dims(enum pipe_texture_target tex)
 173 {
 174    switch (tex) {
 175    case PIPE_TEXTURE_1D:
 176       return 1;
 177    case PIPE_TEXTURE_2D:
 178    case PIPE_TEXTURE_CUBE:
 179       return 2;
 180    case PIPE_TEXTURE_3D:
 181       return 3;
 182    default:
 183       assert(0 && "bad texture target in texture_dims()");
 184       return 2;
 185    }
 186 }
 187
 188
 189 static void
 190 apply_sampler_swizzle(struct lp_build_sample_context *bld,
 191                       LLVMValueRef *texel)
 192 {
 193    unsigned char swizzles[4];
 194
 195    swizzles[0] = bld->static_state->swizzle_r;
 196    swizzles[1] = bld->static_state->swizzle_g;
 197    swizzles[2] = bld->static_state->swizzle_b;
 198    swizzles[3] = bld->static_state->swizzle_a;
 199
 200    lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
 201 }
 202
 203
 204
 205 /**
 206  * Generate code to fetch a texel from a texture at int coords (x, y, z).
 207  * The computation depends on whether the texture is 1D, 2D or 3D.
 208  * The result, texel, will be:
 209  *   texel[0] = red values
 210  *   texel[1] = green values
 211  *   texel[2] = blue values
 212  *   texel[3] = alpha values
 213  */
 214 static void
 215 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 216                           LLVMValueRef width,
 217                           LLVMValueRef height,
 218                           LLVMValueRef depth,
 219                           LLVMValueRef x,
 220                           LLVMValueRef y,
 221                           LLVMValueRef z,
 222                           LLVMValueRef y_stride,
 223                           LLVMValueRef z_stride,
 224                           LLVMValueRef data_ptr,
 225                           LLVMValueRef texel_out[4])
 226 {
 227    const int dims = texture_dims(bld->static_state->target);
 228    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 229    LLVMValueRef offset;
 230    LLVMValueRef i, j;
 231    LLVMValueRef use_border = NULL;
 232
 233    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
 234    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
 235       LLVMValueRef b1, b2;
 236       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 237       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 238       use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 239    }
 240
 241    if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
 242       LLVMValueRef b1, b2;
 243       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 244       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 245       if (use_border) {
 246          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 247          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 248       }
 249       else {
 250          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 251       }
 252    }
 253
 254    if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
 255       LLVMValueRef b1, b2;
 256       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 257       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 258       if (use_border) {
 259          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 260          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 261       }
 262       else {
 263          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 264       }
 265    }
 266
 267    /*
 268     * Describe the coordinates in terms of pixel blocks.
 269     *
 270     * TODO: pixel blocks are power of two. LLVM should convert rem/div to
 271     * bit arithmetic. Verify this.
 272     */
 273
 274    if (bld->format_desc->block.width == 1) {
 275       i = bld->uint_coord_bld.zero;
 276    }
 277    else {
 278       LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
 279       i = LLVMBuildURem(bld->builder, x, block_width, "");
 280       x = LLVMBuildUDiv(bld->builder, x, block_width, "");
 281    }
 282
 283    if (bld->format_desc->block.height == 1) {
 284       j = bld->uint_coord_bld.zero;
 285    }
 286    else {
 287       LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
 288       j = LLVMBuildURem(bld->builder, y, block_height, "");
 289       y = LLVMBuildUDiv(bld->builder, y, block_height, "");
 290    }
 291
 292    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 293    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 294                                    bld->format_desc,
 295                                    x, y, z, y_stride, z_stride);
 296
 297    if (use_border) {
 298       /* If we can sample the border color, it means that texcoords may
 299        * lie outside the bounds of the texture image.  We need to do
 300        * something to prevent reading out of bounds and causing a segfault.
 301        *
 302        * Simply AND the texture coords with !use_border.  This will cause
 303        * coords which are out of bounds to become zero.  Zero's guaranteed
 304        * to be inside the texture image.
 305        */
 306       offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
 307    }
 308
 309    lp_build_fetch_rgba_soa(bld->builder,
 310                            bld->format_desc,
 311                            bld->texel_type,
 312                            data_ptr, offset,
 313                            i, j,
 314                            texel_out);
 315
 316    apply_sampler_swizzle(bld, texel_out);
 317
 318    /*
 319     * Note: if we find an app which frequently samples the texture border
 320     * we might want to implement a true conditional here to avoid sampling
 321     * the texture whenever possible (since that's quite a bit of code).
 322     * Ex:
 323     *   if (use_border) {
 324     *      texel = border_color;
 325     *   }
 326     *   else {
 327     *      texel = sample_texture(coord);
 328     *   }
 329     * As it is now, we always sample the texture, then selectively replace
 330     * the texel color results with the border color.
 331     */
 332
 333    if (use_border) {
 334       /* select texel color or border color depending on use_border */
 335       int chan;
 336       for (chan = 0; chan < 4; chan++) {
 337          LLVMValueRef border_chan =
 338             lp_build_const_vec(bld->texel_type,
 339                                   bld->static_state->border_color[chan]);
 340          texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 341                                            border_chan, texel_out[chan]);
 342       }
 343    }
 344 }
 345
 346
 347 static LLVMValueRef
 348 lp_build_sample_packed(struct lp_build_sample_context *bld,
 349                        LLVMValueRef x,
 350                        LLVMValueRef y,
 351                        LLVMValueRef y_stride,
 352                        LLVMValueRef data_array)
 353 {
 354    LLVMValueRef offset;
 355    LLVMValueRef data_ptr;
 356
 357    offset = lp_build_sample_offset(&bld->uint_coord_bld,
 358                                    bld->format_desc,
 359                                    x, y, NULL, y_stride, NULL);
 360
 361    assert(bld->format_desc->block.width == 1);
 362    assert(bld->format_desc->block.height == 1);
 363    assert(bld->format_desc->block.bits <= bld->texel_type.width);
 364
 365    /* get pointer to mipmap level 0 data */
 366    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 367
 368    return lp_build_gather(bld->builder,
 369                           bld->texel_type.length,
 370                           bld->format_desc->block.bits,
 371                           bld->texel_type.width,
 372                           data_ptr, offset);
 373 }
 374
 375
 376 /**
 377  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 378  */
 379 static LLVMValueRef
 380 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 381                       LLVMValueRef coord)
 382 {
 383    struct lp_build_context *coord_bld = &bld->coord_bld;
 384    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 385    LLVMValueRef fract, flr, isOdd;
 386
 387    /* fract = coord - floor(coord) */
 388    fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
 389
 390    /* flr = ifloor(coord); */
 391    flr = lp_build_ifloor(coord_bld, coord);
 392
 393    /* isOdd = flr & 1 */
 394    isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
 395
 396    /* make coord positive or negative depending on isOdd */
 397    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 398
 399    /* convert isOdd to float */
 400    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 401
 402    /* add isOdd to coord */
 403    coord = lp_build_add(coord_bld, coord, isOdd);
 404
 405    return coord;
 406 }
 407
 408
 409 /**
 410  * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
 411  * Return whether the given mode is supported by that function.
 412  */
 413 static boolean
 414 is_simple_wrap_mode(unsigned mode)
 415 {
 416    switch (mode) {
 417    case PIPE_TEX_WRAP_REPEAT:
 418    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 419       return TRUE;
 420    default:
 421       return FALSE;
 422    }
 423 }
 424
 425
 426 /**
 427  * Build LLVM code for texture wrap mode, for scaled integer texcoords.
 428  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
 429  * \param length  the texture size along one dimension
 430  * \param is_pot  if TRUE, length is a power of two
 431  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 432  */
 433 static LLVMValueRef
 434 lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
 435                          LLVMValueRef coord,
 436                          LLVMValueRef length,
 437                          boolean is_pot,
 438                          unsigned wrap_mode)
 439 {
 440    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 441    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 442    LLVMValueRef length_minus_one;
 443
 444    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 445
 446    switch(wrap_mode) {
 447    case PIPE_TEX_WRAP_REPEAT:
 448       if(is_pot)
 449          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
 450       else
 451          /* Signed remainder won't give the right results for negative
 452           * dividends but unsigned remainder does.*/
 453          coord = LLVMBuildURem(bld->builder, coord, length, "");
 454       break;
 455
 456    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 457       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 458       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 459       break;
 460
 461    case PIPE_TEX_WRAP_CLAMP:
 462    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 463    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 464    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 465    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 466    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 467    default:
 468       assert(0);
 469    }
 470
 471    return coord;
 472 }
 473
 474
 475 /**
 476  * Build LLVM code for texture wrap mode for linear filtering.
 477  * \param x0_out  returns first integer texcoord
 478  * \param x1_out  returns second integer texcoord
 479  * \param weight_out  returns linear interpolation weight
 480  */
 481 static void
 482 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 483                             LLVMValueRef coord,
 484                             LLVMValueRef length,
 485                             boolean is_pot,
 486                             unsigned wrap_mode,
 487                             LLVMValueRef *x0_out,
 488                             LLVMValueRef *x1_out,
 489                             LLVMValueRef *weight_out)
 490 {
 491    struct lp_build_context *coord_bld = &bld->coord_bld;
 492    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 493    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 494    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
 495    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 496    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 497    LLVMValueRef coord0, coord1, weight;
 498
 499    switch(wrap_mode) {
 500    case PIPE_TEX_WRAP_REPEAT:
 501       /* mul by size and subtract 0.5 */
 502       coord = lp_build_mul(coord_bld, coord, length_f);
 503       coord = lp_build_sub(coord_bld, coord, half);
 504       /* convert to int */
 505       coord0 = lp_build_ifloor(coord_bld, coord);
 506       coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
 507       /* compute lerp weight */
 508       weight = lp_build_fract(coord_bld, coord);
 509       /* repeat wrap */
 510       if (is_pot) {
 511          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 512          coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
 513       }
 514       else {
 515          /* Signed remainder won't give the right results for negative
 516           * dividends but unsigned remainder does.*/
 517          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 518          coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
 519       }
 520       break;
 521
 522    case PIPE_TEX_WRAP_CLAMP:
 523       if (bld->static_state->normalized_coords) {
 524          /* scale coord to length */
 525          coord = lp_build_mul(coord_bld, coord, length_f);
 526       }
 527
 528       /* clamp to [0, length] */
 529       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 530
 531       coord = lp_build_sub(coord_bld, coord, half);
 532
 533       weight = lp_build_fract(coord_bld, coord);
 534       coord0 = lp_build_ifloor(coord_bld, coord);
 535       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 536       break;
 537
 538    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 539       if (bld->static_state->normalized_coords) {
 540          /* clamp to [0,1] */
 541          coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
 542          /* mul by tex size and subtract 0.5 */
 543          coord = lp_build_mul(coord_bld, coord, length_f);
 544          coord = lp_build_sub(coord_bld, coord, half);
 545       }
 546       else {
 547          LLVMValueRef min, max;
 548          /* clamp to [0.5, length - 0.5] */
 549          min = half;
 550          max = lp_build_sub(coord_bld, length_f, min);
 551          coord = lp_build_clamp(coord_bld, coord, min, max);
 552       }
 553       /* compute lerp weight */
 554       weight = lp_build_fract(coord_bld, coord);
 555       /* coord0 = floor(coord); */
 556       coord0 = lp_build_ifloor(coord_bld, coord);
 557       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 558       /* coord0 = max(coord0, 0) */
 559       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 560       /* coord1 = min(coord1, length-1) */
 561       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 562       break;
 563
 564    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 565       {
 566          LLVMValueRef min, max;
 567          if (bld->static_state->normalized_coords) {
 568             /* scale coord to length */
 569             coord = lp_build_mul(coord_bld, coord, length_f);
 570          }
 571          /* clamp to [-0.5, length + 0.5] */
 572          min = lp_build_const_vec(coord_bld->type, -0.5F);
 573          max = lp_build_sub(coord_bld, length_f, min);
 574          coord = lp_build_clamp(coord_bld, coord, min, max);
 575          coord = lp_build_sub(coord_bld, coord, half);
 576          /* compute lerp weight */
 577          weight = lp_build_fract(coord_bld, coord);
 578          /* convert to int */
 579          coord0 = lp_build_ifloor(coord_bld, coord);
 580          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 581       }
 582       break;
 583
 584    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 585       /* compute mirror function */
 586       coord = lp_build_coord_mirror(bld, coord);
 587
 588       /* scale coord to length */
 589       coord = lp_build_mul(coord_bld, coord, length_f);
 590       coord = lp_build_sub(coord_bld, coord, half);
 591
 592       /* compute lerp weight */
 593       weight = lp_build_fract(coord_bld, coord);
 594
 595       /* convert to int coords */
 596       coord0 = lp_build_ifloor(coord_bld, coord);
 597       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 598
 599       /* coord0 = max(coord0, 0) */
 600       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 601       /* coord1 = min(coord1, length-1) */
 602       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 603       break;
 604
 605    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 606       coord = lp_build_abs(coord_bld, coord);
 607
 608       if (bld->static_state->normalized_coords) {
 609          /* scale coord to length */
 610          coord = lp_build_mul(coord_bld, coord, length_f);
 611       }
 612
 613       /* clamp to [0, length] */
 614       coord = lp_build_min(coord_bld, coord, length_f);
 615
 616       coord = lp_build_sub(coord_bld, coord, half);
 617
 618       weight = lp_build_fract(coord_bld, coord);
 619       coord0 = lp_build_ifloor(coord_bld, coord);
 620       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 621       break;
 622
 623    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 624       {
 625          LLVMValueRef min, max;
 626
 627          coord = lp_build_abs(coord_bld, coord);
 628
 629          if (bld->static_state->normalized_coords) {
 630             /* scale coord to length */
 631             coord = lp_build_mul(coord_bld, coord, length_f);
 632          }
 633
 634          /* clamp to [0.5, length - 0.5] */
 635          min = half;
 636          max = lp_build_sub(coord_bld, length_f, min);
 637          coord = lp_build_clamp(coord_bld, coord, min, max);
 638
 639          coord = lp_build_sub(coord_bld, coord, half);
 640
 641          weight = lp_build_fract(coord_bld, coord);
 642          coord0 = lp_build_ifloor(coord_bld, coord);
 643          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 644       }
 645       break;
 646
 647    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 648       {
 649          LLVMValueRef min, max;
 650
 651          coord = lp_build_abs(coord_bld, coord);
 652
 653          if (bld->static_state->normalized_coords) {
 654             /* scale coord to length */
 655             coord = lp_build_mul(coord_bld, coord, length_f);
 656          }
 657
 658          /* clamp to [-0.5, length + 0.5] */
 659          min = lp_build_negate(coord_bld, half);
 660          max = lp_build_sub(coord_bld, length_f, min);
 661          coord = lp_build_clamp(coord_bld, coord, min, max);
 662
 663          coord = lp_build_sub(coord_bld, coord, half);
 664
 665          weight = lp_build_fract(coord_bld, coord);
 666          coord0 = lp_build_ifloor(coord_bld, coord);
 667          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 668       }
 669       break;
 670
 671    default:
 672       assert(0);
 673       coord0 = NULL;
 674       coord1 = NULL;
 675       weight = NULL;
 676    }
 677
 678    *x0_out = coord0;
 679    *x1_out = coord1;
 680    *weight_out = weight;
 681 }
 682
 683
 684 /**
 685  * Build LLVM code for texture wrap mode for nearest filtering.
 686  * \param coord  the incoming texcoord (nominally in [0,1])
 687  * \param length  the texture size along one dimension, as int
 688  * \param is_pot  if TRUE, length is a power of two
 689  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 690  */
 691 static LLVMValueRef
 692 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 693                              LLVMValueRef coord,
 694                              LLVMValueRef length,
 695                              boolean is_pot,
 696                              unsigned wrap_mode)
 697 {
 698    struct lp_build_context *coord_bld = &bld->coord_bld;
 699    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 700    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 701    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 702    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 703    LLVMValueRef icoord;
 704
 705    switch(wrap_mode) {
 706    case PIPE_TEX_WRAP_REPEAT:
 707       coord = lp_build_mul(coord_bld, coord, length_f);
 708       icoord = lp_build_ifloor(coord_bld, coord);
 709       if (is_pot)
 710          icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
 711       else
 712          /* Signed remainder won't give the right results for negative
 713           * dividends but unsigned remainder does.*/
 714          icoord = LLVMBuildURem(bld->builder, icoord, length, "");
 715       break;
 716
 717    case PIPE_TEX_WRAP_CLAMP:
 718    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 719       if (bld->static_state->normalized_coords) {
 720          /* scale coord to length */
 721          coord = lp_build_mul(coord_bld, coord, length_f);
 722       }
 723
 724       /* floor */
 725       icoord = lp_build_ifloor(coord_bld, coord);
 726
 727       /* clamp to [0, length - 1]. */
 728       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 729                               length_minus_one);
 730       break;
 731
 732    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 733       /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
 734       {
 735          LLVMValueRef min, max;
 736
 737          if (bld->static_state->normalized_coords) {
 738             /* scale coord to length */
 739             coord = lp_build_mul(coord_bld, coord, length_f);
 740          }
 741
 742          icoord = lp_build_ifloor(coord_bld, coord);
 743
 744          /* clamp to [-1, length] */
 745          min = lp_build_negate(int_coord_bld, int_coord_bld->one);
 746          max = length;
 747          icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
 748       }
 749       break;
 750
 751    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 752       /* compute mirror function */
 753       coord = lp_build_coord_mirror(bld, coord);
 754
 755       /* scale coord to length */
 756       assert(bld->static_state->normalized_coords);
 757       coord = lp_build_mul(coord_bld, coord, length_f);
 758
 759       icoord = lp_build_ifloor(coord_bld, coord);
 760
 761       /* clamp to [0, length - 1] */
 762       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 763       break;
 764
 765    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 766    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 767       coord = lp_build_abs(coord_bld, coord);
 768
 769       if (bld->static_state->normalized_coords) {
 770          /* scale coord to length */
 771          coord = lp_build_mul(coord_bld, coord, length_f);
 772       }
 773
 774       icoord = lp_build_ifloor(coord_bld, coord);
 775
 776       /* clamp to [0, length - 1] */
 777       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 778       break;
 779
 780    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 781       coord = lp_build_abs(coord_bld, coord);
 782
 783       if (bld->static_state->normalized_coords) {
 784          /* scale coord to length */
 785          coord = lp_build_mul(coord_bld, coord, length_f);
 786       }
 787
 788       icoord = lp_build_ifloor(coord_bld, coord);
 789
 790       /* clamp to [0, length] */
 791       icoord = lp_build_min(int_coord_bld, icoord, length);
 792       break;
 793
 794    default:
 795       assert(0);
 796       icoord = NULL;
 797    }
 798
 799    return icoord;
 800 }
 801
 802
 803 /**
 804  * Codegen equivalent for u_minify().
 805  * Return max(1, base_size >> level);
 806  */
 807 static LLVMValueRef
 808 lp_build_minify(struct lp_build_sample_context *bld,
 809                 LLVMValueRef base_size,
 810                 LLVMValueRef level)
 811 {
 812    LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
 813    size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
 814    return size;
 815 }
 816
 817
 818 /**
 819  * Generate code to compute texture level of detail (lambda).
 820  * \param s  vector of texcoord s values
 821  * \param t  vector of texcoord t values
 822  * \param r  vector of texcoord r values
 823  * \param lod_bias  optional float vector with the shader lod bias
 824  * \param explicit_lod  optional float vector with the explicit lod
 825  * \param width  scalar int texture width
 826  * \param height  scalar int texture height
 827  * \param depth  scalar int texture depth
 828  *
 829  * XXX: The resulting lod is scalar, so ignore all but the first element of
 830  * derivatives, lod_bias, etc that are passed by the shader.
 831  */
 832 static LLVMValueRef
 833 lp_build_lod_selector(struct lp_build_sample_context *bld,
 834                       LLVMValueRef s,
 835                       LLVMValueRef t,
 836                       LLVMValueRef r,
 837                       const LLVMValueRef *ddx,
 838                       const LLVMValueRef *ddy,
 839                       LLVMValueRef lod_bias, /* optional */
 840                       LLVMValueRef explicit_lod, /* optional */
 841                       LLVMValueRef width,
 842                       LLVMValueRef height,
 843                       LLVMValueRef depth)
 844
 845 {
 846    if (bld->static_state->min_lod == bld->static_state->max_lod) {
 847       /* User is forcing sampling from a particular mipmap level.
 848        * This is hit during mipmap generation.
 849        */
 850       return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
 851    }
 852    else {
 853       struct lp_build_context *float_bld = &bld->float_bld;
 854       LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
 855                                                     bld->static_state->lod_bias);
 856       LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
 857                                            bld->static_state->min_lod);
 858       LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
 859                                            bld->static_state->max_lod);
 860       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 861       LLVMValueRef lod;
 862
 863       if (explicit_lod) {
 864          lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
 865                                        index0, "");
 866       }
 867       else {
 868          const int dims = texture_dims(bld->static_state->target);
 869          LLVMValueRef dsdx, dsdy;
 870          LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
 871          LLVMValueRef rho;
 872
 873          /*
 874           * dsdx = abs(s[1] - s[0]);
 875           * dsdy = abs(s[2] - s[0]);
 876           * dtdx = abs(t[1] - t[0]);
 877           * dtdy = abs(t[2] - t[0]);
 878           * drdx = abs(r[1] - r[0]);
 879           * drdy = abs(r[2] - r[0]);
 880           */
 881          dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
 882          dsdx = lp_build_abs(float_bld, dsdx);
 883          dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
 884          dsdy = lp_build_abs(float_bld, dsdy);
 885          if (dims > 1) {
 886             dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
 887             dtdx = lp_build_abs(float_bld, dtdx);
 888             dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
 889             dtdy = lp_build_abs(float_bld, dtdy);
 890             if (dims > 2) {
 891                drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
 892                drdx = lp_build_abs(float_bld, drdx);
 893                drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
 894                drdy = lp_build_abs(float_bld, drdy);
 895             }
 896          }
 897
 898          /* Compute rho = max of all partial derivatives scaled by texture size.
 899           * XXX this could be vectorized somewhat
 900           */
 901          rho = LLVMBuildMul(bld->builder,
 902                             lp_build_max(float_bld, dsdx, dsdy),
 903                             lp_build_int_to_float(float_bld, width), "");
 904          if (dims > 1) {
 905             LLVMValueRef max;
 906             max = LLVMBuildMul(bld->builder,
 907                                lp_build_max(float_bld, dtdx, dtdy),
 908                                lp_build_int_to_float(float_bld, height), "");
 909             rho = lp_build_max(float_bld, rho, max);
 910             if (dims > 2) {
 911                max = LLVMBuildMul(bld->builder,
 912                                   lp_build_max(float_bld, drdx, drdy),
 913                                   lp_build_int_to_float(float_bld, depth), "");
 914                rho = lp_build_max(float_bld, rho, max);
 915             }
 916          }
 917
 918          /* compute lod = log2(rho) */
 919          lod = lp_build_log2(float_bld, rho);
 920
 921          /* add shader lod bias */
 922          if (lod_bias) {
 923             lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
 924                                                index0, "");
 925             lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
 926          }
 927       }
 928
 929       /* add sampler lod bias */
 930       lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
 931
 932       /* clamp lod */
 933       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
 934
 935       return lod;
 936    }
 937 }
 938
 939
 940 /**
 941  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
 942  * mipmap level index.
 943  * Note: this is all scalar code.
 944  * \param lod  scalar float texture level of detail
 945  * \param level_out  returns integer
 946  */
 947 static void
 948 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 949                            unsigned unit,
 950                            LLVMValueRef lod,
 951                            LLVMValueRef *level_out)
 952 {
 953    struct lp_build_context *float_bld = &bld->float_bld;
 954    struct lp_build_context *int_bld = &bld->int_bld;
 955    LLVMValueRef last_level, level;
 956
 957    LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
 958
 959    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 960                                                bld->builder, unit);
 961
 962    /* convert float lod to integer */
 963    level = lp_build_iround(float_bld, lod);
 964
 965    /* clamp level to legal range of levels */
 966    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
 967 }
 968
 969
 970 /**
 971  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
 972  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
 973  * two mipmap levels and interpolate between them.
 974  */
 975 static void
 976 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 977                            unsigned unit,
 978                            LLVMValueRef lod,
 979                            LLVMValueRef *level0_out,
 980                            LLVMValueRef *level1_out,
 981                            LLVMValueRef *weight_out)
 982 {
 983    struct lp_build_context *float_bld = &bld->float_bld;
 984    struct lp_build_context *int_bld = &bld->int_bld;
 985    LLVMValueRef last_level, level;
 986
 987    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 988                                                bld->builder, unit);
 989
 990    /* convert float lod to integer */
 991    level = lp_build_ifloor(float_bld, lod);
 992
 993    /* compute level 0 and clamp to legal range of levels */
 994    *level0_out = lp_build_clamp(int_bld, level,
 995                                 int_bld->zero,
 996                                 last_level);
 997    /* compute level 1 and clamp to legal range of levels */
 998    level = lp_build_add(int_bld, level, int_bld->one);
 999    *level1_out = lp_build_clamp(int_bld, level,
1000                                 int_bld->zero,
1001                                 last_level);
1002
1003    *weight_out = lp_build_fract(float_bld, lod);
1004 }
1005
1006
1007 /**
1008  * Generate code to sample a mipmap level with nearest filtering.
1009  * If sampling a cube texture, r = cube face in [0,5].
1010  */
1011 static void
1012 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
1013                               LLVMValueRef width_vec,
1014                               LLVMValueRef height_vec,
1015                               LLVMValueRef depth_vec,
1016                               LLVMValueRef row_stride_vec,
1017                               LLVMValueRef img_stride_vec,
1018                               LLVMValueRef data_ptr,
1019                               LLVMValueRef s,
1020                               LLVMValueRef t,
1021                               LLVMValueRef r,
1022                               LLVMValueRef colors_out[4])
1023 {
1024    const int dims = texture_dims(bld->static_state->target);
1025    LLVMValueRef x, y, z;
1026
1027    /*
1028     * Compute integer texcoords.
1029     */
1030    x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1031                                     bld->static_state->pot_width,
1032                                     bld->static_state->wrap_s);
1033    lp_build_name(x, "tex.x.wrapped");
1034
1035    if (dims >= 2) {
1036       y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1037                                        bld->static_state->pot_height,
1038                                        bld->static_state->wrap_t);
1039       lp_build_name(y, "tex.y.wrapped");
1040
1041       if (dims == 3) {
1042          z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1043                                           bld->static_state->pot_height,
1044                                           bld->static_state->wrap_r);
1045          lp_build_name(z, "tex.z.wrapped");
1046       }
1047       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1048          z = r;
1049       }
1050       else {
1051          z = NULL;
1052       }
1053    }
1054    else {
1055       y = z = NULL;
1056    }
1057
1058    /*
1059     * Get texture colors.
1060     */
1061    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1062                              x, y, z,
1063                              row_stride_vec, img_stride_vec,
1064                              data_ptr, colors_out);
1065 }
1066
1067
1068 /**
1069  * Generate code to sample a mipmap level with linear filtering.
1070  * If sampling a cube texture, r = cube face in [0,5].
1071  */
1072 static void
1073 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1074                              LLVMValueRef width_vec,
1075                              LLVMValueRef height_vec,
1076                              LLVMValueRef depth_vec,
1077                              LLVMValueRef row_stride_vec,
1078                              LLVMValueRef img_stride_vec,
1079                              LLVMValueRef data_ptr,
1080                              LLVMValueRef s,
1081                              LLVMValueRef t,
1082                              LLVMValueRef r,
1083                              LLVMValueRef colors_out[4])
1084 {
1085    const int dims = texture_dims(bld->static_state->target);
1086    LLVMValueRef x0, y0, z0, x1, y1, z1;
1087    LLVMValueRef s_fpart, t_fpart, r_fpart;
1088    LLVMValueRef neighbors[2][2][4];
1089    int chan;
1090
1091    /*
1092     * Compute integer texcoords.
1093     */
1094    lp_build_sample_wrap_linear(bld, s, width_vec,
1095                                bld->static_state->pot_width,
1096                                bld->static_state->wrap_s,
1097                                &x0, &x1, &s_fpart);
1098    lp_build_name(x0, "tex.x0.wrapped");
1099    lp_build_name(x1, "tex.x1.wrapped");
1100
1101    if (dims >= 2) {
1102       lp_build_sample_wrap_linear(bld, t, height_vec,
1103                                   bld->static_state->pot_height,
1104                                   bld->static_state->wrap_t,
1105                                   &y0, &y1, &t_fpart);
1106       lp_build_name(y0, "tex.y0.wrapped");
1107       lp_build_name(y1, "tex.y1.wrapped");
1108
1109       if (dims == 3) {
1110          lp_build_sample_wrap_linear(bld, r, depth_vec,
1111                                      bld->static_state->pot_depth,
1112                                      bld->static_state->wrap_r,
1113                                      &z0, &z1, &r_fpart);
1114          lp_build_name(z0, "tex.z0.wrapped");
1115          lp_build_name(z1, "tex.z1.wrapped");
1116       }
1117       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1118          z0 = z1 = r;  /* cube face */
1119          r_fpart = NULL;
1120       }
1121       else {
1122          z0 = z1 = NULL;
1123          r_fpart = NULL;
1124       }
1125    }
1126    else {
1127       y0 = y1 = t_fpart = NULL;
1128       z0 = z1 = r_fpart = NULL;
1129    }
1130
1131    /*
1132     * Get texture colors.
1133     */
1134    /* get x0/x1 texels */
1135    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1136                              x0, y0, z0,
1137                              row_stride_vec, img_stride_vec,
1138                              data_ptr, neighbors[0][0]);
1139    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1140                              x1, y0, z0,
1141                              row_stride_vec, img_stride_vec,
1142                              data_ptr, neighbors[0][1]);
1143
1144    if (dims == 1) {
1145       /* Interpolate two samples from 1D image to produce one color */
1146       for (chan = 0; chan < 4; chan++) {
1147          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1148                                           neighbors[0][0][chan],
1149                                           neighbors[0][1][chan]);
1150       }
1151    }
1152    else {
1153       /* 2D/3D texture */
1154       LLVMValueRef colors0[4];
1155
1156       /* get x0/x1 texels at y1 */
1157       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1158                                 x0, y1, z0,
1159                                 row_stride_vec, img_stride_vec,
1160                                 data_ptr, neighbors[1][0]);
1161       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1162                                 x1, y1, z0,
1163                                 row_stride_vec, img_stride_vec,
1164                                 data_ptr, neighbors[1][1]);
1165
1166       /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1167       for (chan = 0; chan < 4; chan++) {
1168          colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1169                                           s_fpart, t_fpart,
1170                                           neighbors[0][0][chan],
1171                                           neighbors[0][1][chan],
1172                                           neighbors[1][0][chan],
1173                                           neighbors[1][1][chan]);
1174       }
1175
1176       if (dims == 3) {
1177          LLVMValueRef neighbors1[2][2][4];
1178          LLVMValueRef colors1[4];
1179
1180          /* get x0/x1/y0/y1 texels at z1 */
1181          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1182                                    x0, y0, z1,
1183                                    row_stride_vec, img_stride_vec,
1184                                    data_ptr, neighbors1[0][0]);
1185          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1186                                    x1, y0, z1,
1187                                    row_stride_vec, img_stride_vec,
1188                                    data_ptr, neighbors1[0][1]);
1189          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1190                                    x0, y1, z1,
1191                                    row_stride_vec, img_stride_vec,
1192                                    data_ptr, neighbors1[1][0]);
1193          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1194                                    x1, y1, z1,
1195                                    row_stride_vec, img_stride_vec,
1196                                    data_ptr, neighbors1[1][1]);
1197
1198          /* Bilinear interpolate the four samples from the second Z slice */
1199          for (chan = 0; chan < 4; chan++) {
1200             colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1201                                              s_fpart, t_fpart,
1202                                              neighbors1[0][0][chan],
1203                                              neighbors1[0][1][chan],
1204                                              neighbors1[1][0][chan],
1205                                              neighbors1[1][1][chan]);
1206          }
1207
1208          /* Linearly interpolate the two samples from the two 3D slices */
1209          for (chan = 0; chan < 4; chan++) {
1210             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1211                                              r_fpart,
1212                                              colors0[chan], colors1[chan]);
1213          }
1214       }
1215       else {
1216          /* 2D tex */
1217          for (chan = 0; chan < 4; chan++) {
1218             colors_out[chan] = colors0[chan];
1219          }
1220       }
1221    }
1222 }
1223
1224
1225 /** Helper used by lp_build_cube_lookup() */
1226 static LLVMValueRef
1227 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
1228 {
1229    /* ima = -0.5 / abs(coord); */
1230    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
1231    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1232    LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
1233                                    lp_build_rcp(coord_bld, absCoord));
1234    return ima;
1235 }
1236
1237
1238 /**
1239  * Helper used by lp_build_cube_lookup()
1240  * \param sign  scalar +1 or -1
1241  * \param coord  float vector
1242  * \param ima  float vector
1243  */
1244 static LLVMValueRef
1245 lp_build_cube_coord(struct lp_build_context *coord_bld,
1246                     LLVMValueRef sign, int negate_coord,
1247                     LLVMValueRef coord, LLVMValueRef ima)
1248 {
1249    /* return negate(coord) * ima * sign + 0.5; */
1250    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
1251    LLVMValueRef res;
1252
1253    assert(negate_coord == +1 || negate_coord == -1);
1254
1255    if (negate_coord == -1) {
1256       coord = lp_build_negate(coord_bld, coord);
1257    }
1258
1259    res = lp_build_mul(coord_bld, coord, ima);
1260    if (sign) {
1261       sign = lp_build_broadcast_scalar(coord_bld, sign);
1262       res = lp_build_mul(coord_bld, res, sign);
1263    }
1264    res = lp_build_add(coord_bld, res, half);
1265
1266    return res;
1267 }
1268
1269
1270 /** Helper used by lp_build_cube_lookup()
1271  * Return (major_coord >= 0) ? pos_face : neg_face;
1272  */
1273 static LLVMValueRef
1274 lp_build_cube_face(struct lp_build_sample_context *bld,
1275                    LLVMValueRef major_coord,
1276                    unsigned pos_face, unsigned neg_face)
1277 {
1278    LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1279                                     major_coord,
1280                                     bld->float_bld.zero, "");
1281    LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
1282    LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
1283    LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
1284    return res;
1285 }
1286
1287
1288
1289 /**
1290  * Generate code to do cube face selection and per-face texcoords.
1291  */
1292 static void
1293 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1294                      LLVMValueRef s,
1295                      LLVMValueRef t,
1296                      LLVMValueRef r,
1297                      LLVMValueRef *face,
1298                      LLVMValueRef *face_s,
1299                      LLVMValueRef *face_t)
1300 {
1301    struct lp_build_context *float_bld = &bld->float_bld;
1302    struct lp_build_context *coord_bld = &bld->coord_bld;
1303    LLVMValueRef rx, ry, rz;
1304    LLVMValueRef arx, ary, arz;
1305    LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
1306    LLVMValueRef arx_ge_ary, arx_ge_arz;
1307    LLVMValueRef ary_ge_arx, ary_ge_arz;
1308    LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
1309    LLVMValueRef rx_pos, ry_pos, rz_pos;
1310
1311    assert(bld->coord_bld.type.length == 4);
1312
1313    /*
1314     * Use the average of the four pixel's texcoords to choose the face.
1315     */
1316    rx = lp_build_mul(float_bld, c25,
1317                      lp_build_sum_vector(&bld->coord_bld, s));
1318    ry = lp_build_mul(float_bld, c25,
1319                      lp_build_sum_vector(&bld->coord_bld, t));
1320    rz = lp_build_mul(float_bld, c25,
1321                      lp_build_sum_vector(&bld->coord_bld, r));
1322
1323    arx = lp_build_abs(float_bld, rx);
1324    ary = lp_build_abs(float_bld, ry);
1325    arz = lp_build_abs(float_bld, rz);
1326
1327    /*
1328     * Compare sign/magnitude of rx,ry,rz to determine face
1329     */
1330    arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
1331    arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
1332    ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
1333    ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
1334
1335    arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
1336    ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1337
1338    rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
1339    ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
1340    rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
1341
1342    {
1343       struct lp_build_flow_context *flow_ctx;
1344       struct lp_build_if_state if_ctx;
1345
1346       flow_ctx = lp_build_flow_create(bld->builder);
1347       lp_build_flow_scope_begin(flow_ctx);
1348
1349       *face_s = bld->coord_bld.undef;
1350       *face_t = bld->coord_bld.undef;
1351       *face = bld->int_bld.undef;
1352
1353       lp_build_name(*face_s, "face_s");
1354       lp_build_name(*face_t, "face_t");
1355       lp_build_name(*face, "face");
1356
1357       lp_build_flow_scope_declare(flow_ctx, face_s);
1358       lp_build_flow_scope_declare(flow_ctx, face_t);
1359       lp_build_flow_scope_declare(flow_ctx, face);
1360
1361       lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
1362       {
1363          /* +/- X face */
1364          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
1365          LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
1366          *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
1367          *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1368          *face = lp_build_cube_face(bld, rx,
1369                                     PIPE_TEX_FACE_POS_X,
1370                                     PIPE_TEX_FACE_NEG_X);
1371       }
1372       lp_build_else(&if_ctx);
1373       {
1374          struct lp_build_flow_context *flow_ctx2;
1375          struct lp_build_if_state if_ctx2;
1376
1377          LLVMValueRef face_s2 = bld->coord_bld.undef;
1378          LLVMValueRef face_t2 = bld->coord_bld.undef;
1379          LLVMValueRef face2 = bld->int_bld.undef;
1380
1381          flow_ctx2 = lp_build_flow_create(bld->builder);
1382          lp_build_flow_scope_begin(flow_ctx2);
1383          lp_build_flow_scope_declare(flow_ctx2, &face_s2);
1384          lp_build_flow_scope_declare(flow_ctx2, &face_t2);
1385          lp_build_flow_scope_declare(flow_ctx2, &face2);
1386
1387          ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1388
1389          lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
1390          {
1391             /* +/- Y face */
1392             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
1393             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
1394             face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
1395             face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
1396             face2 = lp_build_cube_face(bld, ry,
1397                                        PIPE_TEX_FACE_POS_Y,
1398                                        PIPE_TEX_FACE_NEG_Y);
1399          }
1400          lp_build_else(&if_ctx2);
1401          {
1402             /* +/- Z face */
1403             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
1404             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
1405             face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
1406             face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1407             face2 = lp_build_cube_face(bld, rz,
1408                                        PIPE_TEX_FACE_POS_Z,
1409                                        PIPE_TEX_FACE_NEG_Z);
1410          }
1411          lp_build_endif(&if_ctx2);
1412          lp_build_flow_scope_end(flow_ctx2);
1413          lp_build_flow_destroy(flow_ctx2);
1414
1415          *face_s = face_s2;
1416          *face_t = face_t2;
1417          *face = face2;
1418       }
1419
1420       lp_build_endif(&if_ctx);
1421       lp_build_flow_scope_end(flow_ctx);
1422       lp_build_flow_destroy(flow_ctx);
1423    }
1424 }
1425
1426
1427
1428 /**
1429  * Sample the texture/mipmap using given image filter and mip filter.
1430  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1431  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1432  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1433  */
1434 static void
1435 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1436                        unsigned img_filter,
1437                        unsigned mip_filter,
1438                        LLVMValueRef s,
1439                        LLVMValueRef t,
1440                        LLVMValueRef r,
1441                        LLVMValueRef lod_fpart,
1442                        LLVMValueRef width0_vec,
1443                        LLVMValueRef width1_vec,
1444                        LLVMValueRef height0_vec,
1445                        LLVMValueRef height1_vec,
1446                        LLVMValueRef depth0_vec,
1447                        LLVMValueRef depth1_vec,
1448                        LLVMValueRef row_stride0_vec,
1449                        LLVMValueRef row_stride1_vec,
1450                        LLVMValueRef img_stride0_vec,
1451                        LLVMValueRef img_stride1_vec,
1452                        LLVMValueRef data_ptr0,
1453                        LLVMValueRef data_ptr1,
1454                        LLVMValueRef *colors_out)
1455 {
1456    LLVMValueRef colors0[4], colors1[4];
1457    int chan;
1458
1459    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1460       lp_build_sample_image_nearest(bld,
1461                                     width0_vec, height0_vec, depth0_vec,
1462                                     row_stride0_vec, img_stride0_vec,
1463                                     data_ptr0, s, t, r, colors0);
1464
1465       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1466          /* sample the second mipmap level, and interp */
1467          lp_build_sample_image_nearest(bld,
1468                                        width1_vec, height1_vec, depth1_vec,
1469                                        row_stride1_vec, img_stride1_vec,
1470                                        data_ptr1, s, t, r, colors1);
1471       }
1472    }
1473    else {
1474       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1475
1476       lp_build_sample_image_linear(bld,
1477                                    width0_vec, height0_vec, depth0_vec,
1478                                    row_stride0_vec, img_stride0_vec,
1479                                    data_ptr0, s, t, r, colors0);
1480
1481       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1482          /* sample the second mipmap level, and interp */
1483          lp_build_sample_image_linear(bld,
1484                                       width1_vec, height1_vec, depth1_vec,
1485                                       row_stride1_vec, img_stride1_vec,
1486                                       data_ptr1, s, t, r, colors1);
1487       }
1488    }
1489
1490    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1491       /* interpolate samples from the two mipmap levels */
1492       for (chan = 0; chan < 4; chan++) {
1493          colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1494                                           colors0[chan], colors1[chan]);
1495       }
1496    }
1497    else {
1498       /* use first/only level's colors */
1499       for (chan = 0; chan < 4; chan++) {
1500          colors_out[chan] = colors0[chan];
1501       }
1502    }
1503 }
1504
1505
1506
1507 /**
1508  * General texture sampling codegen.
1509  * This function handles texture sampling for all texture targets (1D,
1510  * 2D, 3D, cube) and all filtering modes.
1511  */
1512 static void
1513 lp_build_sample_general(struct lp_build_sample_context *bld,
1514                         unsigned unit,
1515                         LLVMValueRef s,
1516                         LLVMValueRef t,
1517                         LLVMValueRef r,
1518                         const LLVMValueRef *ddx,
1519                         const LLVMValueRef *ddy,
1520                         LLVMValueRef lod_bias, /* optional */
1521                         LLVMValueRef explicit_lod, /* optional */
1522                         LLVMValueRef width,
1523                         LLVMValueRef height,
1524                         LLVMValueRef depth,
1525                         LLVMValueRef width_vec,
1526                         LLVMValueRef height_vec,
1527                         LLVMValueRef depth_vec,
1528                         LLVMValueRef row_stride_array,
1529                         LLVMValueRef img_stride_array,
1530                         LLVMValueRef data_array,
1531                         LLVMValueRef *colors_out)
1532 {
1533    struct lp_build_context *float_bld = &bld->float_bld;
1534    const unsigned mip_filter = bld->static_state->min_mip_filter;
1535    const unsigned min_filter = bld->static_state->min_img_filter;
1536    const unsigned mag_filter = bld->static_state->mag_img_filter;
1537    const int dims = texture_dims(bld->static_state->target);
1538    LLVMValueRef lod = NULL, lod_fpart = NULL;
1539    LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
1540    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1541    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1542    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1543    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1544    LLVMValueRef data_ptr0, data_ptr1 = NULL;
1545
1546    /*
1547    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1548           mip_filter, min_filter, mag_filter);
1549    */
1550
1551    /*
1552     * Compute the level of detail (float).
1553     */
1554    if (min_filter != mag_filter ||
1555        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1556       /* Need to compute lod either to choose mipmap levels or to
1557        * distinguish between minification/magnification with one mipmap level.
1558        */
1559       lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy,
1560                                   lod_bias, explicit_lod,
1561                                   width, height, depth);
1562    }
1563
1564    /*
1565     * Compute integer mipmap level(s) to fetch texels from.
1566     */
1567    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1568       /* always use mip level 0 */
1569       ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1570    }
1571    else {
1572       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1573          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1574       }
1575       else {
1576          assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1577          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1578                                     &lod_fpart);
1579          lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1580       }
1581    }
1582
1583    /*
1584     * Convert scalar integer mipmap levels into vectors.
1585     */
1586    ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1587    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1588       ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1589
1590    /*
1591     * Compute width, height at mipmap level 'ilevel0'
1592     */
1593    width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1594    if (dims >= 2) {
1595       height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1596       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1597                                                       ilevel0);
1598       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1599          img_stride0_vec = lp_build_get_level_stride_vec(bld,
1600                                                          img_stride_array,
1601                                                          ilevel0);
1602          if (dims == 3) {
1603             depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1604          }
1605       }
1606    }
1607    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1608       /* compute width, height, depth for second mipmap level at 'ilevel1' */
1609       width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1610       if (dims >= 2) {
1611          height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1612          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1613                                                          ilevel1);
1614          if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1615             img_stride1_vec = lp_build_get_level_stride_vec(bld,
1616                                                             img_stride_array,
1617                                                             ilevel1);
1618             if (dims ==3) {
1619                depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1620             }
1621          }
1622       }
1623    }
1624
1625    /*
1626     * Choose cube face, recompute per-face texcoords.
1627     */
1628    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1629       LLVMValueRef face, face_s, face_t;
1630       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
1631       s = face_s; /* vec */
1632       t = face_t; /* vec */
1633       /* use 'r' to indicate cube face */
1634       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
1635    }
1636
1637    /*
1638     * Get pointer(s) to image data for mipmap level(s).
1639     */
1640    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1641    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1642       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1643    }
1644
1645    /*
1646     * Get/interpolate texture colors.
1647     */
1648    if (min_filter == mag_filter) {
1649       /* no need to distinquish between minification and magnification */
1650       lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
1651                              width0_vec, width1_vec,
1652                              height0_vec, height1_vec,
1653                              depth0_vec, depth1_vec,
1654                              row_stride0_vec, row_stride1_vec,
1655                              img_stride0_vec, img_stride1_vec,
1656                              data_ptr0, data_ptr1,
1657                              colors_out);
1658    }
1659    else {
1660       /* Emit conditional to choose min image filter or mag image filter
1661        * depending on the lod being >0 or <= 0, respectively.
1662        */
1663       struct lp_build_flow_context *flow_ctx;
1664       struct lp_build_if_state if_ctx;
1665       LLVMValueRef minify;
1666
1667       flow_ctx = lp_build_flow_create(bld->builder);
1668       lp_build_flow_scope_begin(flow_ctx);
1669
1670       lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
1671       lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
1672       lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
1673       lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
1674
1675       /* minify = lod > 0.0 */
1676       minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1677                              lod, float_bld->zero, "");
1678
1679       lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
1680       {
1681          /* Use the minification filter */
1682          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1683                                 s, t, r, lod_fpart,
1684                                 width0_vec, width1_vec,
1685                                 height0_vec, height1_vec,
1686                                 depth0_vec, depth1_vec,
1687                                 row_stride0_vec, row_stride1_vec,
1688                                 img_stride0_vec, img_stride1_vec,
1689                                 data_ptr0, data_ptr1,
1690                                 colors_out);
1691       }
1692       lp_build_else(&if_ctx);
1693       {
1694          /* Use the magnification filter */
1695          lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1696                                 s, t, r, lod_fpart,
1697                                 width0_vec, width1_vec,
1698                                 height0_vec, height1_vec,
1699                                 depth0_vec, depth1_vec,
1700                                 row_stride0_vec, row_stride1_vec,
1701                                 img_stride0_vec, img_stride1_vec,
1702                                 data_ptr0, data_ptr1,
1703                                 colors_out);
1704       }
1705       lp_build_endif(&if_ctx);
1706
1707       lp_build_flow_scope_end(flow_ctx);
1708       lp_build_flow_destroy(flow_ctx);
1709    }
1710 }
1711
1712
1713
1714 static void
1715 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
1716                           struct lp_type dst_type,
1717                           LLVMValueRef packed,
1718                           LLVMValueRef *rgba)
1719 {
1720    LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
1721    unsigned chan;
1722
1723    /* Decode the input vector components */
1724    for (chan = 0; chan < 4; ++chan) {
1725       unsigned start = chan*8;
1726       unsigned stop = start + 8;
1727       LLVMValueRef input;
1728
1729       input = packed;
1730
1731       if(start)
1732          input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
1733
1734       if(stop < 32)
1735          input = LLVMBuildAnd(builder, input, mask, "");
1736
1737       input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
1738
1739       rgba[chan] = input;
1740    }
1741 }
1742
1743
1744 static void
1745 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1746                               LLVMValueRef s,
1747                               LLVMValueRef t,
1748                               LLVMValueRef width,
1749                               LLVMValueRef height,
1750                               LLVMValueRef stride_array,
1751                               LLVMValueRef data_array,
1752                               LLVMValueRef texel_out[4])
1753 {
1754    LLVMBuilderRef builder = bld->builder;
1755    struct lp_build_context i32, h16, u8n;
1756    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1757    LLVMValueRef i32_c8, i32_c128, i32_c255;
1758    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1759    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1760    LLVMValueRef x0, x1;
1761    LLVMValueRef y0, y1;
1762    LLVMValueRef neighbors[2][2];
1763    LLVMValueRef neighbors_lo[2][2];
1764    LLVMValueRef neighbors_hi[2][2];
1765    LLVMValueRef packed, packed_lo, packed_hi;
1766    LLVMValueRef unswizzled[4];
1767    LLVMValueRef stride;
1768
1769    assert(bld->static_state->target == PIPE_TEXTURE_2D);
1770    assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
1771    assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
1772    assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
1773
1774    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1775    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1776    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1777
1778    i32_vec_type = lp_build_vec_type(i32.type);
1779    h16_vec_type = lp_build_vec_type(h16.type);
1780    u8n_vec_type = lp_build_vec_type(u8n.type);
1781
1782    if (bld->static_state->normalized_coords) {
1783       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1784       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1785       LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1786       s = lp_build_mul(&bld->coord_bld, s, fp_width);
1787       t = lp_build_mul(&bld->coord_bld, t, fp_height);
1788    }
1789
1790    /* scale coords by 256 (8 fractional bits) */
1791    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1792    t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1793
1794    /* convert float to int */
1795    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1796    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1797
1798    /* subtract 0.5 (add -128) */
1799    i32_c128 = lp_build_const_int_vec(i32.type, -128);
1800    s = LLVMBuildAdd(builder, s, i32_c128, "");
1801    t = LLVMBuildAdd(builder, t, i32_c128, "");
1802
1803    /* compute floor (shift right 8) */
1804    i32_c8 = lp_build_const_int_vec(i32.type, 8);
1805    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1806    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1807
1808    /* compute fractional part (AND with 0xff) */
1809    i32_c255 = lp_build_const_int_vec(i32.type, 255);
1810    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1811    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1812
1813    x0 = s_ipart;
1814    y0 = t_ipart;
1815
1816    x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1817    y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1818
1819    x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1820                                  bld->static_state->wrap_s);
1821    y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1822                                  bld->static_state->wrap_t);
1823
1824    x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1825                                  bld->static_state->wrap_s);
1826    y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1827                                  bld->static_state->wrap_t);
1828
1829    /*
1830     * Transform 4 x i32 in
1831     *
1832     *   s_fpart = {s0, s1, s2, s3}
1833     *
1834     * into 8 x i16
1835     *
1836     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1837     *
1838     * into two 8 x i16
1839     *
1840     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1841     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1842     *
1843     * and likewise for t_fpart. There is no risk of loosing precision here
1844     * since the fractional parts only use the lower 8bits.
1845     */
1846
1847    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1848    t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1849
1850    {
1851       LLVMTypeRef elem_type = LLVMInt32Type();
1852       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1853       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1854       LLVMValueRef shuffle_lo;
1855       LLVMValueRef shuffle_hi;
1856       unsigned i, j;
1857
1858       for(j = 0; j < h16.type.length; j += 4) {
1859          unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
1860          LLVMValueRef index;
1861
1862          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1863          for(i = 0; i < 4; ++i)
1864             shuffles_lo[j + i] = index;
1865
1866          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1867          for(i = 0; i < 4; ++i)
1868             shuffles_hi[j + i] = index;
1869       }
1870
1871       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1872       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1873
1874       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1875       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1876       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1877       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1878    }
1879
1880    stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
1881
1882    /*
1883     * Fetch the pixels as 4 x 32bit (rgba order might differ):
1884     *
1885     *   rgba0 rgba1 rgba2 rgba3
1886     *
1887     * bit cast them into 16 x u8
1888     *
1889     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1890     *
1891     * unpack them into two 8 x i16:
1892     *
1893     *   r0 g0 b0 a0 r1 g1 b1 a1
1894     *   r2 g2 b2 a2 r3 g3 b3 a3
1895     *
1896     * The higher 8 bits of the resulting elements will be zero.
1897     */
1898
1899    neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1900    neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1901    neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1902    neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1903
1904    neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1905    neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1906    neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1907    neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1908
1909    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1910    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1911    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1912    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1913
1914    /*
1915     * Linear interpolate with 8.8 fixed point.
1916     */
1917
1918    packed_lo = lp_build_lerp_2d(&h16,
1919                                 s_fpart_lo, t_fpart_lo,
1920                                 neighbors_lo[0][0],
1921                                 neighbors_lo[0][1],
1922                                 neighbors_lo[1][0],
1923                                 neighbors_lo[1][1]);
1924
1925    packed_hi = lp_build_lerp_2d(&h16,
1926                                 s_fpart_hi, t_fpart_hi,
1927                                 neighbors_hi[0][0],
1928                                 neighbors_hi[0][1],
1929                                 neighbors_hi[1][0],
1930                                 neighbors_hi[1][1]);
1931
1932    packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1933
1934    /*
1935     * Convert to SoA and swizzle.
1936     */
1937
1938    packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
1939
1940    lp_build_rgba8_to_f32_soa(bld->builder,
1941                              bld->texel_type,
1942                              packed, unswizzled);
1943
1944    lp_build_format_swizzle_soa(bld->format_desc,
1945                                &bld->texel_bld,
1946                                unswizzled, texel_out);
1947
1948    apply_sampler_swizzle(bld, texel_out);
1949 }
1950
1951
1952 static void
1953 lp_build_sample_compare(struct lp_build_sample_context *bld,
1954                         LLVMValueRef p,
1955                         LLVMValueRef texel[4])
1956 {
1957    struct lp_build_context *texel_bld = &bld->texel_bld;
1958    LLVMValueRef res;
1959    unsigned chan;
1960
1961    if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1962       return;
1963
1964    /* TODO: Compare before swizzling, to avoid redundant computations */
1965    res = NULL;
1966    for(chan = 0; chan < 4; ++chan) {
1967       LLVMValueRef cmp;
1968       cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1969       cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1970
1971       if(res)
1972          res = lp_build_add(texel_bld, res, cmp);
1973       else
1974          res = cmp;
1975    }
1976
1977    assert(res);
1978    res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
1979
1980    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1981    for(chan = 0; chan < 3; ++chan)
1982       texel[chan] = res;
1983    texel[3] = texel_bld->one;
1984 }
1985
1986
1987 /**
1988  * Just set texels to white instead of actually sampling the texture.
1989  * For debugging.
1990  */
1991 static void
1992 lp_build_sample_nop(struct lp_build_sample_context *bld,
1993                     LLVMValueRef texel_out[4])
1994 {
1995    struct lp_build_context *texel_bld = &bld->texel_bld;
1996    unsigned chan;
1997
1998    for (chan = 0; chan < 4; chan++) {
1999       /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
2000       texel_out[chan] = texel_bld->one;
2001    }
2002 }
2003
2004
2005 /**
2006  * Build texture sampling code.
2007  * 'texel' will return a vector of four LLVMValueRefs corresponding to
2008  * R, G, B, A.
2009  * \param type  vector float type to use for coords, etc.
2010  */
2011 void
2012 lp_build_sample_soa(LLVMBuilderRef builder,
2013                     const struct lp_sampler_static_state *static_state,
2014                     struct lp_sampler_dynamic_state *dynamic_state,
2015                     struct lp_type type,
2016                     unsigned unit,
2017                     unsigned num_coords,
2018                     const LLVMValueRef *coords,
2019                     const LLVMValueRef *ddx,
2020                     const LLVMValueRef *ddy,
2021                     LLVMValueRef lod_bias, /* optional */
2022                     LLVMValueRef explicit_lod, /* optional */
2023                     LLVMValueRef texel_out[4])
2024 {
2025    struct lp_build_sample_context bld;
2026    LLVMValueRef width, width_vec;
2027    LLVMValueRef height, height_vec;
2028    LLVMValueRef depth, depth_vec;
2029    LLVMValueRef row_stride_array, img_stride_array;
2030    LLVMValueRef data_array;
2031    LLVMValueRef s;
2032    LLVMValueRef t;
2033    LLVMValueRef r;
2034
2035    if (0) {
2036       enum pipe_format fmt = static_state->format;
2037       debug_printf("Sample from %s\n", util_format_name(fmt));
2038    }
2039
2040    /* Setup our build context */
2041    memset(&bld, 0, sizeof bld);
2042    bld.builder = builder;
2043    bld.static_state = static_state;
2044    bld.dynamic_state = dynamic_state;
2045    bld.format_desc = util_format_description(static_state->format);
2046
2047    bld.float_type = lp_type_float(32);
2048    bld.int_type = lp_type_int(32);
2049    bld.coord_type = type;
2050    bld.uint_coord_type = lp_uint_type(type);
2051    bld.int_coord_type = lp_int_type(type);
2052    bld.texel_type = type;
2053
2054    lp_build_context_init(&bld.float_bld, builder, bld.float_type);
2055    lp_build_context_init(&bld.int_bld, builder, bld.int_type);
2056    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
2057    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
2058    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
2059    lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
2060
2061    /* Get the dynamic state */
2062    width = dynamic_state->width(dynamic_state, builder, unit);
2063    height = dynamic_state->height(dynamic_state, builder, unit);
2064    depth = dynamic_state->depth(dynamic_state, builder, unit);
2065    row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
2066    img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
2067    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
2068    /* Note that data_array is an array[level] of pointers to texture images */
2069
2070    s = coords[0];
2071    t = coords[1];
2072    r = coords[2];
2073
2074    width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
2075    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
2076    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
2077
2078    if (0) {
2079       /* For debug: no-op texture sampling */
2080       lp_build_sample_nop(&bld, texel_out);
2081    }
2082    else if (util_format_is_rgba8_variant(bld.format_desc) &&
2083             static_state->target == PIPE_TEXTURE_2D &&
2084             static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
2085             static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
2086             static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2087             is_simple_wrap_mode(static_state->wrap_s) &&
2088             is_simple_wrap_mode(static_state->wrap_t)) {
2089       /* special case */
2090       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
2091                                     row_stride_array, data_array, texel_out);
2092    }
2093    else {
2094       lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
2095                               lod_bias, explicit_lod,
2096                               width, height, depth,
2097                               width_vec, height_vec, depth_vec,
2098                               row_stride_array, img_stride_array,
2099                               data_array,
2100                               texel_out);
2101    }
2102
2103    lp_build_sample_compare(&bld, r, texel_out);
2104 }