src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_logic.h"
  49 #include "lp_bld_swizzle.h"
  50 #include "lp_bld_pack.h"
  51 #include "lp_bld_flow.h"
  52 #include "lp_bld_gather.h"
  53 #include "lp_bld_format.h"
  54 #include "lp_bld_sample.h"
  55 #include "lp_bld_quad.h"
  56
  57
  58 /**
  59  * Keep all information for sampling code generation in a single place.
  60  */
  61 struct lp_build_sample_context
  62 {
  63    LLVMBuilderRef builder;
  64
  65    const struct lp_sampler_static_state *static_state;
  66
  67    struct lp_sampler_dynamic_state *dynamic_state;
  68
  69    const struct util_format_description *format_desc;
  70
  71    /** regular scalar float type */
  72    struct lp_type float_type;
  73    struct lp_build_context float_bld;
  74
  75    /** regular scalar float type */
  76    struct lp_type int_type;
  77    struct lp_build_context int_bld;
  78
  79    /** Incoming coordinates type and build context */
  80    struct lp_type coord_type;
  81    struct lp_build_context coord_bld;
  82
  83    /** Unsigned integer coordinates */
  84    struct lp_type uint_coord_type;
  85    struct lp_build_context uint_coord_bld;
  86
  87    /** Signed integer coordinates */
  88    struct lp_type int_coord_type;
  89    struct lp_build_context int_coord_bld;
  90
  91    /** Output texels type and build context */
  92    struct lp_type texel_type;
  93    struct lp_build_context texel_bld;
  94 };
  95
  96
  97 /**
  98  * Does the given texture wrap mode allow sampling the texture border color?
  99  * XXX maybe move this into gallium util code.
 100  */
 101 static boolean
 102 wrap_mode_uses_border_color(unsigned mode)
 103 {
 104    switch (mode) {
 105    case PIPE_TEX_WRAP_REPEAT:
 106    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 107    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 108    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 109       return FALSE;
 110    case PIPE_TEX_WRAP_CLAMP:
 111    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 112    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 113    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 114       return TRUE;
 115    default:
 116       assert(0 && "unexpected wrap mode");
 117       return FALSE;
 118    }
 119 }
 120
 121
 122 static LLVMValueRef
 123 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
 124                           LLVMValueRef data_array, LLVMValueRef level)
 125 {
 126    LLVMValueRef indexes[2], data_ptr;
 127    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 128    indexes[1] = level;
 129    data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
 130    data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
 131    return data_ptr;
 132 }
 133
 134
 135 static LLVMValueRef
 136 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
 137                                 LLVMValueRef data_array, int level)
 138 {
 139    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 140    return lp_build_get_mipmap_level(bld, data_array, lvl);
 141 }
 142
 143
 144 /**
 145  * Dereference stride_array[mipmap_level] array to get a stride.
 146  * Return stride as a vector.
 147  */
 148 static LLVMValueRef
 149 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 150                               LLVMValueRef stride_array, LLVMValueRef level)
 151 {
 152    LLVMValueRef indexes[2], stride;
 153    indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
 154    indexes[1] = level;
 155    stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
 156    stride = LLVMBuildLoad(bld->builder, stride, "");
 157    stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
 158    return stride;
 159 }
 160
 161
 162 /** Dereference stride_array[0] array to get a stride (as vector). */
 163 static LLVMValueRef
 164 lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
 165                                     LLVMValueRef stride_array, int level)
 166 {
 167    LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
 168    return lp_build_get_level_stride_vec(bld, stride_array, lvl);
 169 }
 170
 171
 172 static int
 173 texture_dims(enum pipe_texture_target tex)
 174 {
 175    switch (tex) {
 176    case PIPE_TEXTURE_1D:
 177       return 1;
 178    case PIPE_TEXTURE_2D:
 179    case PIPE_TEXTURE_CUBE:
 180       return 2;
 181    case PIPE_TEXTURE_3D:
 182       return 3;
 183    default:
 184       assert(0 && "bad texture target in texture_dims()");
 185       return 2;
 186    }
 187 }
 188
 189
 190 static void
 191 apply_sampler_swizzle(struct lp_build_sample_context *bld,
 192                       LLVMValueRef *texel)
 193 {
 194    unsigned char swizzles[4];
 195
 196    swizzles[0] = bld->static_state->swizzle_r;
 197    swizzles[1] = bld->static_state->swizzle_g;
 198    swizzles[2] = bld->static_state->swizzle_b;
 199    swizzles[3] = bld->static_state->swizzle_a;
 200
 201    lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
 202 }
 203
 204
 205
 206 /**
 207  * Generate code to fetch a texel from a texture at int coords (x, y, z).
 208  * The computation depends on whether the texture is 1D, 2D or 3D.
 209  * The result, texel, will be:
 210  *   texel[0] = red values
 211  *   texel[1] = green values
 212  *   texel[2] = blue values
 213  *   texel[3] = alpha values
 214  */
 215 static void
 216 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 217                           LLVMValueRef width,
 218                           LLVMValueRef height,
 219                           LLVMValueRef depth,
 220                           LLVMValueRef x,
 221                           LLVMValueRef y,
 222                           LLVMValueRef z,
 223                           LLVMValueRef y_stride,
 224                           LLVMValueRef z_stride,
 225                           LLVMValueRef data_ptr,
 226                           LLVMValueRef texel_out[4])
 227 {
 228    const int dims = texture_dims(bld->static_state->target);
 229    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 230    LLVMValueRef offset;
 231    LLVMValueRef i, j;
 232    LLVMValueRef use_border = NULL;
 233
 234    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
 235    if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
 236       LLVMValueRef b1, b2;
 237       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
 238       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
 239       use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 240    }
 241
 242    if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
 243       LLVMValueRef b1, b2;
 244       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
 245       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
 246       if (use_border) {
 247          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 248          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 249       }
 250       else {
 251          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 252       }
 253    }
 254
 255    if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
 256       LLVMValueRef b1, b2;
 257       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
 258       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
 259       if (use_border) {
 260          use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
 261          use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
 262       }
 263       else {
 264          use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
 265       }
 266    }
 267
 268    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 269    lp_build_sample_offset(&bld->uint_coord_bld,
 270                           bld->format_desc,
 271                           x, y, z, y_stride, z_stride,
 272                           &offset, &i, &j);
 273
 274    if (use_border) {
 275       /* If we can sample the border color, it means that texcoords may
 276        * lie outside the bounds of the texture image.  We need to do
 277        * something to prevent reading out of bounds and causing a segfault.
 278        *
 279        * Simply AND the texture coords with !use_border.  This will cause
 280        * coords which are out of bounds to become zero.  Zero's guaranteed
 281        * to be inside the texture image.
 282        */
 283       offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
 284    }
 285
 286    lp_build_fetch_rgba_soa(bld->builder,
 287                            bld->format_desc,
 288                            bld->texel_type,
 289                            data_ptr, offset,
 290                            i, j,
 291                            texel_out);
 292
 293    apply_sampler_swizzle(bld, texel_out);
 294
 295    /*
 296     * Note: if we find an app which frequently samples the texture border
 297     * we might want to implement a true conditional here to avoid sampling
 298     * the texture whenever possible (since that's quite a bit of code).
 299     * Ex:
 300     *   if (use_border) {
 301     *      texel = border_color;
 302     *   }
 303     *   else {
 304     *      texel = sample_texture(coord);
 305     *   }
 306     * As it is now, we always sample the texture, then selectively replace
 307     * the texel color results with the border color.
 308     */
 309
 310    if (use_border) {
 311       /* select texel color or border color depending on use_border */
 312       int chan;
 313       for (chan = 0; chan < 4; chan++) {
 314          LLVMValueRef border_chan =
 315             lp_build_const_vec(bld->texel_type,
 316                                   bld->static_state->border_color[chan]);
 317          texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
 318                                            border_chan, texel_out[chan]);
 319       }
 320    }
 321 }
 322
 323
 324 /**
 325  * Fetch the texels as <4n x i8> in AoS form.
 326  */
 327 static LLVMValueRef
 328 lp_build_sample_packed(struct lp_build_sample_context *bld,
 329                        LLVMValueRef x,
 330                        LLVMValueRef y,
 331                        LLVMValueRef y_stride,
 332                        LLVMValueRef data_array)
 333 {
 334    LLVMValueRef offset, i, j;
 335    LLVMValueRef data_ptr;
 336    LLVMValueRef res;
 337
 338    /* convert x,y,z coords to linear offset from start of texture, in bytes */
 339    lp_build_sample_offset(&bld->uint_coord_bld,
 340                           bld->format_desc,
 341                           x, y, NULL, y_stride, NULL,
 342                           &offset, &i, &j);
 343
 344    /* get pointer to mipmap level 0 data */
 345    data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
 346
 347    if (util_format_is_rgba8_variant(bld->format_desc)) {
 348       /* Just fetch the data directly without swizzling */
 349       assert(bld->format_desc->block.width == 1);
 350       assert(bld->format_desc->block.height == 1);
 351       assert(bld->format_desc->block.bits <= bld->texel_type.width);
 352
 353       res = lp_build_gather(bld->builder,
 354                             bld->texel_type.length,
 355                             bld->format_desc->block.bits,
 356                             bld->texel_type.width,
 357                             data_ptr, offset);
 358    }
 359    else {
 360       struct lp_type type;
 361
 362       assert(bld->texel_type.width == 32);
 363
 364       memset(&type, 0, sizeof type);
 365       type.width = 8;
 366       type.length = bld->texel_type.length*4;
 367       type.norm = TRUE;
 368
 369       res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
 370                                     data_ptr, offset, i, j);
 371    }
 372
 373    return res;
 374 }
 375
 376
 377 /**
 378  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
 379  */
 380 static LLVMValueRef
 381 lp_build_coord_mirror(struct lp_build_sample_context *bld,
 382                       LLVMValueRef coord)
 383 {
 384    struct lp_build_context *coord_bld = &bld->coord_bld;
 385    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 386    LLVMValueRef fract, flr, isOdd;
 387
 388    /* fract = coord - floor(coord) */
 389    fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
 390
 391    /* flr = ifloor(coord); */
 392    flr = lp_build_ifloor(coord_bld, coord);
 393
 394    /* isOdd = flr & 1 */
 395    isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
 396
 397    /* make coord positive or negative depending on isOdd */
 398    coord = lp_build_set_sign(coord_bld, fract, isOdd);
 399
 400    /* convert isOdd to float */
 401    isOdd = lp_build_int_to_float(coord_bld, isOdd);
 402
 403    /* add isOdd to coord */
 404    coord = lp_build_add(coord_bld, coord, isOdd);
 405
 406    return coord;
 407 }
 408
 409
 410 /**
 411  * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
 412  * Return whether the given mode is supported by that function.
 413  */
 414 static boolean
 415 is_simple_wrap_mode(unsigned mode)
 416 {
 417    switch (mode) {
 418    case PIPE_TEX_WRAP_REPEAT:
 419    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 420       return TRUE;
 421    default:
 422       return FALSE;
 423    }
 424 }
 425
 426
 427 /**
 428  * Build LLVM code for texture wrap mode, for scaled integer texcoords.
 429  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
 430  * \param length  the texture size along one dimension
 431  * \param is_pot  if TRUE, length is a power of two
 432  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 433  */
 434 static LLVMValueRef
 435 lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
 436                          LLVMValueRef coord,
 437                          LLVMValueRef length,
 438                          boolean is_pot,
 439                          unsigned wrap_mode)
 440 {
 441    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 442    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 443    LLVMValueRef length_minus_one;
 444
 445    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 446
 447    switch(wrap_mode) {
 448    case PIPE_TEX_WRAP_REPEAT:
 449       if(is_pot)
 450          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
 451       else
 452          /* Signed remainder won't give the right results for negative
 453           * dividends but unsigned remainder does.*/
 454          coord = LLVMBuildURem(bld->builder, coord, length, "");
 455       break;
 456
 457    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 458       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 459       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 460       break;
 461
 462    case PIPE_TEX_WRAP_CLAMP:
 463    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 464    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 465    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 466    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 467    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 468    default:
 469       assert(0);
 470    }
 471
 472    return coord;
 473 }
 474
 475
 476 /**
 477  * Build LLVM code for texture wrap mode for linear filtering.
 478  * \param x0_out  returns first integer texcoord
 479  * \param x1_out  returns second integer texcoord
 480  * \param weight_out  returns linear interpolation weight
 481  */
 482 static void
 483 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 484                             LLVMValueRef coord,
 485                             LLVMValueRef length,
 486                             boolean is_pot,
 487                             unsigned wrap_mode,
 488                             LLVMValueRef *x0_out,
 489                             LLVMValueRef *x1_out,
 490                             LLVMValueRef *weight_out)
 491 {
 492    struct lp_build_context *coord_bld = &bld->coord_bld;
 493    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 494    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 495    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
 496    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 497    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 498    LLVMValueRef coord0, coord1, weight;
 499
 500    switch(wrap_mode) {
 501    case PIPE_TEX_WRAP_REPEAT:
 502       /* mul by size and subtract 0.5 */
 503       coord = lp_build_mul(coord_bld, coord, length_f);
 504       coord = lp_build_sub(coord_bld, coord, half);
 505       /* convert to int */
 506       coord0 = lp_build_ifloor(coord_bld, coord);
 507       coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
 508       /* compute lerp weight */
 509       weight = lp_build_fract(coord_bld, coord);
 510       /* repeat wrap */
 511       if (is_pot) {
 512          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 513          coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
 514       }
 515       else {
 516          /* Signed remainder won't give the right results for negative
 517           * dividends but unsigned remainder does.*/
 518          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 519          coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
 520       }
 521       break;
 522
 523    case PIPE_TEX_WRAP_CLAMP:
 524       if (bld->static_state->normalized_coords) {
 525          /* scale coord to length */
 526          coord = lp_build_mul(coord_bld, coord, length_f);
 527       }
 528
 529       /* clamp to [0, length] */
 530       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
 531
 532       coord = lp_build_sub(coord_bld, coord, half);
 533
 534       weight = lp_build_fract(coord_bld, coord);
 535       coord0 = lp_build_ifloor(coord_bld, coord);
 536       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 537       break;
 538
 539    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 540       if (bld->static_state->normalized_coords) {
 541          /* clamp to [0,1] */
 542          coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
 543          /* mul by tex size and subtract 0.5 */
 544          coord = lp_build_mul(coord_bld, coord, length_f);
 545          coord = lp_build_sub(coord_bld, coord, half);
 546       }
 547       else {
 548          LLVMValueRef min, max;
 549          /* clamp to [0.5, length - 0.5] */
 550          min = half;
 551          max = lp_build_sub(coord_bld, length_f, min);
 552          coord = lp_build_clamp(coord_bld, coord, min, max);
 553       }
 554       /* compute lerp weight */
 555       weight = lp_build_fract(coord_bld, coord);
 556       /* coord0 = floor(coord); */
 557       coord0 = lp_build_ifloor(coord_bld, coord);
 558       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 559       /* coord0 = max(coord0, 0) */
 560       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 561       /* coord1 = min(coord1, length-1) */
 562       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 563       break;
 564
 565    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 566       {
 567          LLVMValueRef min, max;
 568          if (bld->static_state->normalized_coords) {
 569             /* scale coord to length */
 570             coord = lp_build_mul(coord_bld, coord, length_f);
 571          }
 572          /* clamp to [-0.5, length + 0.5] */
 573          min = lp_build_const_vec(coord_bld->type, -0.5F);
 574          max = lp_build_sub(coord_bld, length_f, min);
 575          coord = lp_build_clamp(coord_bld, coord, min, max);
 576          coord = lp_build_sub(coord_bld, coord, half);
 577          /* compute lerp weight */
 578          weight = lp_build_fract(coord_bld, coord);
 579          /* convert to int */
 580          coord0 = lp_build_ifloor(coord_bld, coord);
 581          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 582       }
 583       break;
 584
 585    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 586       /* compute mirror function */
 587       coord = lp_build_coord_mirror(bld, coord);
 588
 589       /* scale coord to length */
 590       coord = lp_build_mul(coord_bld, coord, length_f);
 591       coord = lp_build_sub(coord_bld, coord, half);
 592
 593       /* compute lerp weight */
 594       weight = lp_build_fract(coord_bld, coord);
 595
 596       /* convert to int coords */
 597       coord0 = lp_build_ifloor(coord_bld, coord);
 598       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 599
 600       /* coord0 = max(coord0, 0) */
 601       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
 602       /* coord1 = min(coord1, length-1) */
 603       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
 604       break;
 605
 606    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 607       coord = lp_build_abs(coord_bld, coord);
 608
 609       if (bld->static_state->normalized_coords) {
 610          /* scale coord to length */
 611          coord = lp_build_mul(coord_bld, coord, length_f);
 612       }
 613
 614       /* clamp to [0, length] */
 615       coord = lp_build_min(coord_bld, coord, length_f);
 616
 617       coord = lp_build_sub(coord_bld, coord, half);
 618
 619       weight = lp_build_fract(coord_bld, coord);
 620       coord0 = lp_build_ifloor(coord_bld, coord);
 621       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 622       break;
 623
 624    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 625       {
 626          LLVMValueRef min, max;
 627
 628          coord = lp_build_abs(coord_bld, coord);
 629
 630          if (bld->static_state->normalized_coords) {
 631             /* scale coord to length */
 632             coord = lp_build_mul(coord_bld, coord, length_f);
 633          }
 634
 635          /* clamp to [0.5, length - 0.5] */
 636          min = half;
 637          max = lp_build_sub(coord_bld, length_f, min);
 638          coord = lp_build_clamp(coord_bld, coord, min, max);
 639
 640          coord = lp_build_sub(coord_bld, coord, half);
 641
 642          weight = lp_build_fract(coord_bld, coord);
 643          coord0 = lp_build_ifloor(coord_bld, coord);
 644          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 645       }
 646       break;
 647
 648    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 649       {
 650          LLVMValueRef min, max;
 651
 652          coord = lp_build_abs(coord_bld, coord);
 653
 654          if (bld->static_state->normalized_coords) {
 655             /* scale coord to length */
 656             coord = lp_build_mul(coord_bld, coord, length_f);
 657          }
 658
 659          /* clamp to [-0.5, length + 0.5] */
 660          min = lp_build_negate(coord_bld, half);
 661          max = lp_build_sub(coord_bld, length_f, min);
 662          coord = lp_build_clamp(coord_bld, coord, min, max);
 663
 664          coord = lp_build_sub(coord_bld, coord, half);
 665
 666          weight = lp_build_fract(coord_bld, coord);
 667          coord0 = lp_build_ifloor(coord_bld, coord);
 668          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 669       }
 670       break;
 671
 672    default:
 673       assert(0);
 674       coord0 = NULL;
 675       coord1 = NULL;
 676       weight = NULL;
 677    }
 678
 679    *x0_out = coord0;
 680    *x1_out = coord1;
 681    *weight_out = weight;
 682 }
 683
 684
 685 /**
 686  * Build LLVM code for texture wrap mode for nearest filtering.
 687  * \param coord  the incoming texcoord (nominally in [0,1])
 688  * \param length  the texture size along one dimension, as int
 689  * \param is_pot  if TRUE, length is a power of two
 690  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 691  */
 692 static LLVMValueRef
 693 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 694                              LLVMValueRef coord,
 695                              LLVMValueRef length,
 696                              boolean is_pot,
 697                              unsigned wrap_mode)
 698 {
 699    struct lp_build_context *coord_bld = &bld->coord_bld;
 700    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 701    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 702    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
 703    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
 704    LLVMValueRef icoord;
 705
 706    switch(wrap_mode) {
 707    case PIPE_TEX_WRAP_REPEAT:
 708       coord = lp_build_mul(coord_bld, coord, length_f);
 709       icoord = lp_build_ifloor(coord_bld, coord);
 710       if (is_pot)
 711          icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
 712       else
 713          /* Signed remainder won't give the right results for negative
 714           * dividends but unsigned remainder does.*/
 715          icoord = LLVMBuildURem(bld->builder, icoord, length, "");
 716       break;
 717
 718    case PIPE_TEX_WRAP_CLAMP:
 719    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 720       if (bld->static_state->normalized_coords) {
 721          /* scale coord to length */
 722          coord = lp_build_mul(coord_bld, coord, length_f);
 723       }
 724
 725       /* floor */
 726       icoord = lp_build_ifloor(coord_bld, coord);
 727
 728       /* clamp to [0, length - 1]. */
 729       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
 730                               length_minus_one);
 731       break;
 732
 733    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 734       /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
 735       {
 736          LLVMValueRef min, max;
 737
 738          if (bld->static_state->normalized_coords) {
 739             /* scale coord to length */
 740             coord = lp_build_mul(coord_bld, coord, length_f);
 741          }
 742
 743          icoord = lp_build_ifloor(coord_bld, coord);
 744
 745          /* clamp to [-1, length] */
 746          min = lp_build_negate(int_coord_bld, int_coord_bld->one);
 747          max = length;
 748          icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
 749       }
 750       break;
 751
 752    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 753       /* compute mirror function */
 754       coord = lp_build_coord_mirror(bld, coord);
 755
 756       /* scale coord to length */
 757       assert(bld->static_state->normalized_coords);
 758       coord = lp_build_mul(coord_bld, coord, length_f);
 759
 760       icoord = lp_build_ifloor(coord_bld, coord);
 761
 762       /* clamp to [0, length - 1] */
 763       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 764       break;
 765
 766    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 767    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 768       coord = lp_build_abs(coord_bld, coord);
 769
 770       if (bld->static_state->normalized_coords) {
 771          /* scale coord to length */
 772          coord = lp_build_mul(coord_bld, coord, length_f);
 773       }
 774
 775       icoord = lp_build_ifloor(coord_bld, coord);
 776
 777       /* clamp to [0, length - 1] */
 778       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
 779       break;
 780
 781    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 782       coord = lp_build_abs(coord_bld, coord);
 783
 784       if (bld->static_state->normalized_coords) {
 785          /* scale coord to length */
 786          coord = lp_build_mul(coord_bld, coord, length_f);
 787       }
 788
 789       icoord = lp_build_ifloor(coord_bld, coord);
 790
 791       /* clamp to [0, length] */
 792       icoord = lp_build_min(int_coord_bld, icoord, length);
 793       break;
 794
 795    default:
 796       assert(0);
 797       icoord = NULL;
 798    }
 799
 800    return icoord;
 801 }
 802
 803
 804 /**
 805  * Codegen equivalent for u_minify().
 806  * Return max(1, base_size >> level);
 807  */
 808 static LLVMValueRef
 809 lp_build_minify(struct lp_build_sample_context *bld,
 810                 LLVMValueRef base_size,
 811                 LLVMValueRef level)
 812 {
 813    LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
 814    size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
 815    return size;
 816 }
 817
 818
 819 /**
 820  * Generate code to compute texture level of detail (lambda).
 821  * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 822  * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 823  * \param lod_bias  optional float vector with the shader lod bias
 824  * \param explicit_lod  optional float vector with the explicit lod
 825  * \param width  scalar int texture width
 826  * \param height  scalar int texture height
 827  * \param depth  scalar int texture depth
 828  *
 829  * XXX: The resulting lod is scalar, so ignore all but the first element of
 830  * derivatives, lod_bias, etc that are passed by the shader.
 831  */
 832 static LLVMValueRef
 833 lp_build_lod_selector(struct lp_build_sample_context *bld,
 834                       const LLVMValueRef ddx[4],
 835                       const LLVMValueRef ddy[4],
 836                       LLVMValueRef lod_bias, /* optional */
 837                       LLVMValueRef explicit_lod, /* optional */
 838                       LLVMValueRef width,
 839                       LLVMValueRef height,
 840                       LLVMValueRef depth)
 841
 842 {
 843    if (bld->static_state->min_lod == bld->static_state->max_lod) {
 844       /* User is forcing sampling from a particular mipmap level.
 845        * This is hit during mipmap generation.
 846        */
 847       return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
 848    }
 849    else {
 850       struct lp_build_context *float_bld = &bld->float_bld;
 851       LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
 852                                                     bld->static_state->lod_bias);
 853       LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
 854                                            bld->static_state->min_lod);
 855       LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
 856                                            bld->static_state->max_lod);
 857       LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 858       LLVMValueRef lod;
 859
 860       if (explicit_lod) {
 861          lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
 862                                        index0, "");
 863       }
 864       else {
 865          const int dims = texture_dims(bld->static_state->target);
 866          LLVMValueRef dsdx, dsdy;
 867          LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
 868          LLVMValueRef rho;
 869
 870          dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
 871          dsdx = lp_build_abs(float_bld, dsdx);
 872          dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
 873          dsdy = lp_build_abs(float_bld, dsdy);
 874          if (dims > 1) {
 875             dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
 876             dtdx = lp_build_abs(float_bld, dtdx);
 877             dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
 878             dtdy = lp_build_abs(float_bld, dtdy);
 879             if (dims > 2) {
 880                drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
 881                drdx = lp_build_abs(float_bld, drdx);
 882                drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
 883                drdy = lp_build_abs(float_bld, drdy);
 884             }
 885          }
 886
 887          /* Compute rho = max of all partial derivatives scaled by texture size.
 888           * XXX this could be vectorized somewhat
 889           */
 890          rho = LLVMBuildFMul(bld->builder,
 891                             lp_build_max(float_bld, dsdx, dsdy),
 892                             lp_build_int_to_float(float_bld, width), "");
 893          if (dims > 1) {
 894             LLVMValueRef max;
 895             max = LLVMBuildFMul(bld->builder,
 896                                lp_build_max(float_bld, dtdx, dtdy),
 897                                lp_build_int_to_float(float_bld, height), "");
 898             rho = lp_build_max(float_bld, rho, max);
 899             if (dims > 2) {
 900                max = LLVMBuildFMul(bld->builder,
 901                                   lp_build_max(float_bld, drdx, drdy),
 902                                   lp_build_int_to_float(float_bld, depth), "");
 903                rho = lp_build_max(float_bld, rho, max);
 904             }
 905          }
 906
 907          /* compute lod = log2(rho) */
 908          lod = lp_build_log2(float_bld, rho);
 909
 910          /* add shader lod bias */
 911          if (lod_bias) {
 912             lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
 913                                                index0, "");
 914             lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
 915          }
 916       }
 917
 918       /* add sampler lod bias */
 919       lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
 920
 921       /* clamp lod */
 922       lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
 923
 924       return lod;
 925    }
 926 }
 927
 928
 929 /**
 930  * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
 931  * mipmap level index.
 932  * Note: this is all scalar code.
 933  * \param lod  scalar float texture level of detail
 934  * \param level_out  returns integer
 935  */
 936 static void
 937 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
 938                            unsigned unit,
 939                            LLVMValueRef lod,
 940                            LLVMValueRef *level_out)
 941 {
 942    struct lp_build_context *float_bld = &bld->float_bld;
 943    struct lp_build_context *int_bld = &bld->int_bld;
 944    LLVMValueRef last_level, level;
 945
 946    LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
 947
 948    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 949                                                bld->builder, unit);
 950
 951    /* convert float lod to integer */
 952    level = lp_build_iround(float_bld, lod);
 953
 954    /* clamp level to legal range of levels */
 955    *level_out = lp_build_clamp(int_bld, level, zero, last_level);
 956 }
 957
 958
 959 /**
 960  * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
 961  * two (adjacent) mipmap level indexes.  Later, we'll sample from those
 962  * two mipmap levels and interpolate between them.
 963  */
 964 static void
 965 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
 966                            unsigned unit,
 967                            LLVMValueRef lod,
 968                            LLVMValueRef *level0_out,
 969                            LLVMValueRef *level1_out,
 970                            LLVMValueRef *weight_out)
 971 {
 972    struct lp_build_context *float_bld = &bld->float_bld;
 973    struct lp_build_context *int_bld = &bld->int_bld;
 974    LLVMValueRef last_level, level;
 975
 976    last_level = bld->dynamic_state->last_level(bld->dynamic_state,
 977                                                bld->builder, unit);
 978
 979    /* convert float lod to integer */
 980    level = lp_build_ifloor(float_bld, lod);
 981
 982    /* compute level 0 and clamp to legal range of levels */
 983    *level0_out = lp_build_clamp(int_bld, level,
 984                                 int_bld->zero,
 985                                 last_level);
 986    /* compute level 1 and clamp to legal range of levels */
 987    level = lp_build_add(int_bld, level, int_bld->one);
 988    *level1_out = lp_build_clamp(int_bld, level,
 989                                 int_bld->zero,
 990                                 last_level);
 991
 992    *weight_out = lp_build_fract(float_bld, lod);
 993 }
 994
 995
 996 /**
 997  * Generate code to sample a mipmap level with nearest filtering.
 998  * If sampling a cube texture, r = cube face in [0,5].
 999  */
1000 static void
1001 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
1002                               LLVMValueRef width_vec,
1003                               LLVMValueRef height_vec,
1004                               LLVMValueRef depth_vec,
1005                               LLVMValueRef row_stride_vec,
1006                               LLVMValueRef img_stride_vec,
1007                               LLVMValueRef data_ptr,
1008                               LLVMValueRef s,
1009                               LLVMValueRef t,
1010                               LLVMValueRef r,
1011                               LLVMValueRef colors_out[4])
1012 {
1013    const int dims = texture_dims(bld->static_state->target);
1014    LLVMValueRef x, y, z;
1015
1016    /*
1017     * Compute integer texcoords.
1018     */
1019    x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1020                                     bld->static_state->pot_width,
1021                                     bld->static_state->wrap_s);
1022    lp_build_name(x, "tex.x.wrapped");
1023
1024    if (dims >= 2) {
1025       y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1026                                        bld->static_state->pot_height,
1027                                        bld->static_state->wrap_t);
1028       lp_build_name(y, "tex.y.wrapped");
1029
1030       if (dims == 3) {
1031          z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1032                                           bld->static_state->pot_height,
1033                                           bld->static_state->wrap_r);
1034          lp_build_name(z, "tex.z.wrapped");
1035       }
1036       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1037          z = r;
1038       }
1039       else {
1040          z = NULL;
1041       }
1042    }
1043    else {
1044       y = z = NULL;
1045    }
1046
1047    /*
1048     * Get texture colors.
1049     */
1050    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1051                              x, y, z,
1052                              row_stride_vec, img_stride_vec,
1053                              data_ptr, colors_out);
1054 }
1055
1056
1057 /**
1058  * Generate code to sample a mipmap level with linear filtering.
1059  * If sampling a cube texture, r = cube face in [0,5].
1060  */
1061 static void
1062 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1063                              LLVMValueRef width_vec,
1064                              LLVMValueRef height_vec,
1065                              LLVMValueRef depth_vec,
1066                              LLVMValueRef row_stride_vec,
1067                              LLVMValueRef img_stride_vec,
1068                              LLVMValueRef data_ptr,
1069                              LLVMValueRef s,
1070                              LLVMValueRef t,
1071                              LLVMValueRef r,
1072                              LLVMValueRef colors_out[4])
1073 {
1074    const int dims = texture_dims(bld->static_state->target);
1075    LLVMValueRef x0, y0, z0, x1, y1, z1;
1076    LLVMValueRef s_fpart, t_fpart, r_fpart;
1077    LLVMValueRef neighbors[2][2][4];
1078    int chan;
1079
1080    /*
1081     * Compute integer texcoords.
1082     */
1083    lp_build_sample_wrap_linear(bld, s, width_vec,
1084                                bld->static_state->pot_width,
1085                                bld->static_state->wrap_s,
1086                                &x0, &x1, &s_fpart);
1087    lp_build_name(x0, "tex.x0.wrapped");
1088    lp_build_name(x1, "tex.x1.wrapped");
1089
1090    if (dims >= 2) {
1091       lp_build_sample_wrap_linear(bld, t, height_vec,
1092                                   bld->static_state->pot_height,
1093                                   bld->static_state->wrap_t,
1094                                   &y0, &y1, &t_fpart);
1095       lp_build_name(y0, "tex.y0.wrapped");
1096       lp_build_name(y1, "tex.y1.wrapped");
1097
1098       if (dims == 3) {
1099          lp_build_sample_wrap_linear(bld, r, depth_vec,
1100                                      bld->static_state->pot_depth,
1101                                      bld->static_state->wrap_r,
1102                                      &z0, &z1, &r_fpart);
1103          lp_build_name(z0, "tex.z0.wrapped");
1104          lp_build_name(z1, "tex.z1.wrapped");
1105       }
1106       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1107          z0 = z1 = r;  /* cube face */
1108          r_fpart = NULL;
1109       }
1110       else {
1111          z0 = z1 = NULL;
1112          r_fpart = NULL;
1113       }
1114    }
1115    else {
1116       y0 = y1 = t_fpart = NULL;
1117       z0 = z1 = r_fpart = NULL;
1118    }
1119
1120    /*
1121     * Get texture colors.
1122     */
1123    /* get x0/x1 texels */
1124    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1125                              x0, y0, z0,
1126                              row_stride_vec, img_stride_vec,
1127                              data_ptr, neighbors[0][0]);
1128    lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1129                              x1, y0, z0,
1130                              row_stride_vec, img_stride_vec,
1131                              data_ptr, neighbors[0][1]);
1132
1133    if (dims == 1) {
1134       /* Interpolate two samples from 1D image to produce one color */
1135       for (chan = 0; chan < 4; chan++) {
1136          colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1137                                           neighbors[0][0][chan],
1138                                           neighbors[0][1][chan]);
1139       }
1140    }
1141    else {
1142       /* 2D/3D texture */
1143       LLVMValueRef colors0[4];
1144
1145       /* get x0/x1 texels at y1 */
1146       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1147                                 x0, y1, z0,
1148                                 row_stride_vec, img_stride_vec,
1149                                 data_ptr, neighbors[1][0]);
1150       lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1151                                 x1, y1, z0,
1152                                 row_stride_vec, img_stride_vec,
1153                                 data_ptr, neighbors[1][1]);
1154
1155       /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1156       for (chan = 0; chan < 4; chan++) {
1157          colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1158                                           s_fpart, t_fpart,
1159                                           neighbors[0][0][chan],
1160                                           neighbors[0][1][chan],
1161                                           neighbors[1][0][chan],
1162                                           neighbors[1][1][chan]);
1163       }
1164
1165       if (dims == 3) {
1166          LLVMValueRef neighbors1[2][2][4];
1167          LLVMValueRef colors1[4];
1168
1169          /* get x0/x1/y0/y1 texels at z1 */
1170          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1171                                    x0, y0, z1,
1172                                    row_stride_vec, img_stride_vec,
1173                                    data_ptr, neighbors1[0][0]);
1174          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1175                                    x1, y0, z1,
1176                                    row_stride_vec, img_stride_vec,
1177                                    data_ptr, neighbors1[0][1]);
1178          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1179                                    x0, y1, z1,
1180                                    row_stride_vec, img_stride_vec,
1181                                    data_ptr, neighbors1[1][0]);
1182          lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1183                                    x1, y1, z1,
1184                                    row_stride_vec, img_stride_vec,
1185                                    data_ptr, neighbors1[1][1]);
1186
1187          /* Bilinear interpolate the four samples from the second Z slice */
1188          for (chan = 0; chan < 4; chan++) {
1189             colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1190                                              s_fpart, t_fpart,
1191                                              neighbors1[0][0][chan],
1192                                              neighbors1[0][1][chan],
1193                                              neighbors1[1][0][chan],
1194                                              neighbors1[1][1][chan]);
1195          }
1196
1197          /* Linearly interpolate the two samples from the two 3D slices */
1198          for (chan = 0; chan < 4; chan++) {
1199             colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1200                                              r_fpart,
1201                                              colors0[chan], colors1[chan]);
1202          }
1203       }
1204       else {
1205          /* 2D tex */
1206          for (chan = 0; chan < 4; chan++) {
1207             colors_out[chan] = colors0[chan];
1208          }
1209       }
1210    }
1211 }
1212
1213
1214 /** Helper used by lp_build_cube_lookup() */
1215 static LLVMValueRef
1216 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
1217 {
1218    /* ima = -0.5 / abs(coord); */
1219    LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
1220    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1221    LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
1222    return ima;
1223 }
1224
1225
1226 /**
1227  * Helper used by lp_build_cube_lookup()
1228  * \param sign  scalar +1 or -1
1229  * \param coord  float vector
1230  * \param ima  float vector
1231  */
1232 static LLVMValueRef
1233 lp_build_cube_coord(struct lp_build_context *coord_bld,
1234                     LLVMValueRef sign, int negate_coord,
1235                     LLVMValueRef coord, LLVMValueRef ima)
1236 {
1237    /* return negate(coord) * ima * sign + 0.5; */
1238    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
1239    LLVMValueRef res;
1240
1241    assert(negate_coord == +1 || negate_coord == -1);
1242
1243    if (negate_coord == -1) {
1244       coord = lp_build_negate(coord_bld, coord);
1245    }
1246
1247    res = lp_build_mul(coord_bld, coord, ima);
1248    if (sign) {
1249       sign = lp_build_broadcast_scalar(coord_bld, sign);
1250       res = lp_build_mul(coord_bld, res, sign);
1251    }
1252    res = lp_build_add(coord_bld, res, half);
1253
1254    return res;
1255 }
1256
1257
1258 /** Helper used by lp_build_cube_lookup()
1259  * Return (major_coord >= 0) ? pos_face : neg_face;
1260  */
1261 static LLVMValueRef
1262 lp_build_cube_face(struct lp_build_sample_context *bld,
1263                    LLVMValueRef major_coord,
1264                    unsigned pos_face, unsigned neg_face)
1265 {
1266    LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1267                                     major_coord,
1268                                     bld->float_bld.zero, "");
1269    LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
1270    LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
1271    LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
1272    return res;
1273 }
1274
1275
1276
1277 /**
1278  * Generate code to do cube face selection and compute per-face texcoords.
1279  */
1280 static void
1281 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1282                      LLVMValueRef s,
1283                      LLVMValueRef t,
1284                      LLVMValueRef r,
1285                      LLVMValueRef *face,
1286                      LLVMValueRef *face_s,
1287                      LLVMValueRef *face_t)
1288 {
1289    struct lp_build_context *float_bld = &bld->float_bld;
1290    struct lp_build_context *coord_bld = &bld->coord_bld;
1291    LLVMValueRef rx, ry, rz;
1292    LLVMValueRef arx, ary, arz;
1293    LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
1294    LLVMValueRef arx_ge_ary, arx_ge_arz;
1295    LLVMValueRef ary_ge_arx, ary_ge_arz;
1296    LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
1297    LLVMValueRef rx_pos, ry_pos, rz_pos;
1298
1299    assert(bld->coord_bld.type.length == 4);
1300
1301    /*
1302     * Use the average of the four pixel's texcoords to choose the face.
1303     */
1304    rx = lp_build_mul(float_bld, c25,
1305                      lp_build_sum_vector(&bld->coord_bld, s));
1306    ry = lp_build_mul(float_bld, c25,
1307                      lp_build_sum_vector(&bld->coord_bld, t));
1308    rz = lp_build_mul(float_bld, c25,
1309                      lp_build_sum_vector(&bld->coord_bld, r));
1310
1311    arx = lp_build_abs(float_bld, rx);
1312    ary = lp_build_abs(float_bld, ry);
1313    arz = lp_build_abs(float_bld, rz);
1314
1315    /*
1316     * Compare sign/magnitude of rx,ry,rz to determine face
1317     */
1318    arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
1319    arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
1320    ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
1321    ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
1322
1323    arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
1324    ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1325
1326    rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
1327    ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
1328    rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
1329
1330    {
1331       struct lp_build_flow_context *flow_ctx;
1332       struct lp_build_if_state if_ctx;
1333
1334       flow_ctx = lp_build_flow_create(bld->builder);
1335       lp_build_flow_scope_begin(flow_ctx);
1336
1337       *face_s = bld->coord_bld.undef;
1338       *face_t = bld->coord_bld.undef;
1339       *face = bld->int_bld.undef;
1340
1341       lp_build_name(*face_s, "face_s");
1342       lp_build_name(*face_t, "face_t");
1343       lp_build_name(*face, "face");
1344
1345       lp_build_flow_scope_declare(flow_ctx, face_s);
1346       lp_build_flow_scope_declare(flow_ctx, face_t);
1347       lp_build_flow_scope_declare(flow_ctx, face);
1348
1349       lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
1350       {
1351          /* +/- X face */
1352          LLVMValueRef sign = lp_build_sgn(float_bld, rx);
1353          LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
1354          *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
1355          *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1356          *face = lp_build_cube_face(bld, rx,
1357                                     PIPE_TEX_FACE_POS_X,
1358                                     PIPE_TEX_FACE_NEG_X);
1359       }
1360       lp_build_else(&if_ctx);
1361       {
1362          struct lp_build_flow_context *flow_ctx2;
1363          struct lp_build_if_state if_ctx2;
1364
1365          LLVMValueRef face_s2 = bld->coord_bld.undef;
1366          LLVMValueRef face_t2 = bld->coord_bld.undef;
1367          LLVMValueRef face2 = bld->int_bld.undef;
1368
1369          flow_ctx2 = lp_build_flow_create(bld->builder);
1370          lp_build_flow_scope_begin(flow_ctx2);
1371          lp_build_flow_scope_declare(flow_ctx2, &face_s2);
1372          lp_build_flow_scope_declare(flow_ctx2, &face_t2);
1373          lp_build_flow_scope_declare(flow_ctx2, &face2);
1374
1375          ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1376
1377          lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
1378          {
1379             /* +/- Y face */
1380             LLVMValueRef sign = lp_build_sgn(float_bld, ry);
1381             LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
1382             face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
1383             face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
1384             face2 = lp_build_cube_face(bld, ry,
1385                                        PIPE_TEX_FACE_POS_Y,
1386                                        PIPE_TEX_FACE_NEG_Y);
1387          }
1388          lp_build_else(&if_ctx2);
1389          {
1390             /* +/- Z face */
1391             LLVMValueRef sign = lp_build_sgn(float_bld, rz);
1392             LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
1393             face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
1394             face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1395             face2 = lp_build_cube_face(bld, rz,
1396                                        PIPE_TEX_FACE_POS_Z,
1397                                        PIPE_TEX_FACE_NEG_Z);
1398          }
1399          lp_build_endif(&if_ctx2);
1400          lp_build_flow_scope_end(flow_ctx2);
1401          lp_build_flow_destroy(flow_ctx2);
1402          *face_s = face_s2;
1403          *face_t = face_t2;
1404          *face = face2;
1405       }
1406
1407       lp_build_endif(&if_ctx);
1408       lp_build_flow_scope_end(flow_ctx);
1409       lp_build_flow_destroy(flow_ctx);
1410    }
1411 }
1412
1413
1414
1415 /**
1416  * Sample the texture/mipmap using given image filter and mip filter.
1417  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1418  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1419  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1420  */
1421 static void
1422 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1423                        unsigned img_filter,
1424                        unsigned mip_filter,
1425                        LLVMValueRef s,
1426                        LLVMValueRef t,
1427                        LLVMValueRef r,
1428                        LLVMValueRef lod_fpart,
1429                        LLVMValueRef width0_vec,
1430                        LLVMValueRef width1_vec,
1431                        LLVMValueRef height0_vec,
1432                        LLVMValueRef height1_vec,
1433                        LLVMValueRef depth0_vec,
1434                        LLVMValueRef depth1_vec,
1435                        LLVMValueRef row_stride0_vec,
1436                        LLVMValueRef row_stride1_vec,
1437                        LLVMValueRef img_stride0_vec,
1438                        LLVMValueRef img_stride1_vec,
1439                        LLVMValueRef data_ptr0,
1440                        LLVMValueRef data_ptr1,
1441                        LLVMValueRef *colors_out)
1442 {
1443    LLVMValueRef colors0[4], colors1[4];
1444    int chan;
1445
1446    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1447       /* sample the first mipmap level */
1448       lp_build_sample_image_nearest(bld,
1449                                     width0_vec, height0_vec, depth0_vec,
1450                                     row_stride0_vec, img_stride0_vec,
1451                                     data_ptr0, s, t, r, colors0);
1452
1453       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1454          /* sample the second mipmap level */
1455          lp_build_sample_image_nearest(bld,
1456                                        width1_vec, height1_vec, depth1_vec,
1457                                        row_stride1_vec, img_stride1_vec,
1458                                        data_ptr1, s, t, r, colors1);
1459       }
1460    }
1461    else {
1462       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1463
1464       /* sample the first mipmap level */
1465       lp_build_sample_image_linear(bld,
1466                                    width0_vec, height0_vec, depth0_vec,
1467                                    row_stride0_vec, img_stride0_vec,
1468                                    data_ptr0, s, t, r, colors0);
1469
1470       if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1471          /* sample the second mipmap level */
1472          lp_build_sample_image_linear(bld,
1473                                       width1_vec, height1_vec, depth1_vec,
1474                                       row_stride1_vec, img_stride1_vec,
1475                                       data_ptr1, s, t, r, colors1);
1476       }
1477    }
1478
1479    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1480       /* interpolate samples from the two mipmap levels */
1481       for (chan = 0; chan < 4; chan++) {
1482          colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1483                                           colors0[chan], colors1[chan]);
1484       }
1485    }
1486    else {
1487       /* use first/only level's colors */
1488       for (chan = 0; chan < 4; chan++) {
1489          colors_out[chan] = colors0[chan];
1490       }
1491    }
1492 }
1493
1494
1495
1496 /**
1497  * General texture sampling codegen.
1498  * This function handles texture sampling for all texture targets (1D,
1499  * 2D, 3D, cube) and all filtering modes.
1500  */
1501 static void
1502 lp_build_sample_general(struct lp_build_sample_context *bld,
1503                         unsigned unit,
1504                         LLVMValueRef s,
1505                         LLVMValueRef t,
1506                         LLVMValueRef r,
1507                         const LLVMValueRef *ddx,
1508                         const LLVMValueRef *ddy,
1509                         LLVMValueRef lod_bias, /* optional */
1510                         LLVMValueRef explicit_lod, /* optional */
1511                         LLVMValueRef width,
1512                         LLVMValueRef height,
1513                         LLVMValueRef depth,
1514                         LLVMValueRef width_vec,
1515                         LLVMValueRef height_vec,
1516                         LLVMValueRef depth_vec,
1517                         LLVMValueRef row_stride_array,
1518                         LLVMValueRef img_stride_array,
1519                         LLVMValueRef data_array,
1520                         LLVMValueRef *colors_out)
1521 {
1522    struct lp_build_context *float_bld = &bld->float_bld;
1523    const unsigned mip_filter = bld->static_state->min_mip_filter;
1524    const unsigned min_filter = bld->static_state->min_img_filter;
1525    const unsigned mag_filter = bld->static_state->mag_img_filter;
1526    const int dims = texture_dims(bld->static_state->target);
1527    LLVMValueRef lod = NULL, lod_fpart = NULL;
1528    LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
1529    LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1530    LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1531    LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1532    LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1533    LLVMValueRef data_ptr0, data_ptr1 = NULL;
1534    LLVMValueRef face_ddx[4], face_ddy[4];
1535
1536    /*
1537    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1538           mip_filter, min_filter, mag_filter);
1539    */
1540
1541    /*
1542     * Choose cube face, recompute texcoords and derivatives for the chosen face.
1543     */
1544    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1545       LLVMValueRef face, face_s, face_t;
1546       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
1547       s = face_s; /* vec */
1548       t = face_t; /* vec */
1549       /* use 'r' to indicate cube face */
1550       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
1551
1552       /* recompute ddx, ddy using the new (s,t) face texcoords */
1553       face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
1554       face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
1555       face_ddx[2] = NULL;
1556       face_ddx[3] = NULL;
1557       face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
1558       face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
1559       face_ddy[2] = NULL;
1560       face_ddy[3] = NULL;
1561       ddx = face_ddx;
1562       ddy = face_ddy;
1563    }
1564
1565    /*
1566     * Compute the level of detail (float).
1567     */
1568    if (min_filter != mag_filter ||
1569        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1570       /* Need to compute lod either to choose mipmap levels or to
1571        * distinguish between minification/magnification with one mipmap level.
1572        */
1573       lod = lp_build_lod_selector(bld, ddx, ddy,
1574                                   lod_bias, explicit_lod,
1575                                   width, height, depth);
1576    }
1577
1578    /*
1579     * Compute integer mipmap level(s) to fetch texels from.
1580     */
1581    if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1582       /* always use mip level 0 */
1583       if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1584          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1585           * We should be able to set ilevel0 = const(0) but that causes
1586           * bad x86 code to be emitted.
1587           */
1588          lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
1589          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1590       }
1591       else {
1592          ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1593       }
1594    }
1595    else {
1596       assert(lod);
1597       if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1598          lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1599       }
1600       else {
1601          assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1602          lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1603                                     &lod_fpart);
1604          lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1605       }
1606    }
1607
1608    /*
1609     * Convert scalar integer mipmap levels into vectors.
1610     */
1611    ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1612    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1613       ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1614
1615    /*
1616     * Compute width, height at mipmap level 'ilevel0'
1617     */
1618    width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1619    if (dims >= 2) {
1620       height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1621       row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1622                                                       ilevel0);
1623       if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1624          img_stride0_vec = lp_build_get_level_stride_vec(bld,
1625                                                          img_stride_array,
1626                                                          ilevel0);
1627          if (dims == 3) {
1628             depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1629          }
1630       }
1631    }
1632    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1633       /* compute width, height, depth for second mipmap level at 'ilevel1' */
1634       width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1635       if (dims >= 2) {
1636          height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1637          row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1638                                                          ilevel1);
1639          if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1640             img_stride1_vec = lp_build_get_level_stride_vec(bld,
1641                                                             img_stride_array,
1642                                                             ilevel1);
1643             if (dims ==3) {
1644                depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1645             }
1646          }
1647       }
1648    }
1649
1650    /*
1651     * Get pointer(s) to image data for mipmap level(s).
1652     */
1653    data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1654    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1655       data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1656    }
1657
1658    /*
1659     * Get/interpolate texture colors.
1660     */
1661    if (min_filter == mag_filter) {
1662       /* no need to distinquish between minification and magnification */
1663       lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
1664                              width0_vec, width1_vec,
1665                              height0_vec, height1_vec,
1666                              depth0_vec, depth1_vec,
1667                              row_stride0_vec, row_stride1_vec,
1668                              img_stride0_vec, img_stride1_vec,
1669                              data_ptr0, data_ptr1,
1670                              colors_out);
1671    }
1672    else {
1673       /* Emit conditional to choose min image filter or mag image filter
1674        * depending on the lod being >0 or <= 0, respectively.
1675        */
1676       struct lp_build_flow_context *flow_ctx;
1677       struct lp_build_if_state if_ctx;
1678       LLVMValueRef minify;
1679
1680       flow_ctx = lp_build_flow_create(bld->builder);
1681       lp_build_flow_scope_begin(flow_ctx);
1682
1683       lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
1684       lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
1685       lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
1686       lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
1687
1688       /* minify = lod > 0.0 */
1689       minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1690                              lod, float_bld->zero, "");
1691
1692       lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
1693       {
1694          /* Use the minification filter */
1695          lp_build_sample_mipmap(bld, min_filter, mip_filter,
1696                                 s, t, r, lod_fpart,
1697                                 width0_vec, width1_vec,
1698                                 height0_vec, height1_vec,
1699                                 depth0_vec, depth1_vec,
1700                                 row_stride0_vec, row_stride1_vec,
1701                                 img_stride0_vec, img_stride1_vec,
1702                                 data_ptr0, data_ptr1,
1703                                 colors_out);
1704       }
1705       lp_build_else(&if_ctx);
1706       {
1707          /* Use the magnification filter */
1708          lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1709                                 s, t, r, lod_fpart,
1710                                 width0_vec, width1_vec,
1711                                 height0_vec, height1_vec,
1712                                 depth0_vec, depth1_vec,
1713                                 row_stride0_vec, row_stride1_vec,
1714                                 img_stride0_vec, img_stride1_vec,
1715                                 data_ptr0, data_ptr1,
1716                                 colors_out);
1717       }
1718       lp_build_endif(&if_ctx);
1719
1720       lp_build_flow_scope_end(flow_ctx);
1721       lp_build_flow_destroy(flow_ctx);
1722    }
1723 }
1724
1725
1726
1727 static void
1728 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1729                               LLVMValueRef s,
1730                               LLVMValueRef t,
1731                               LLVMValueRef width,
1732                               LLVMValueRef height,
1733                               LLVMValueRef stride_array,
1734                               LLVMValueRef data_array,
1735                               LLVMValueRef texel_out[4])
1736 {
1737    LLVMBuilderRef builder = bld->builder;
1738    struct lp_build_context i32, h16, u8n;
1739    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1740    LLVMValueRef i32_c8, i32_c128, i32_c255;
1741    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1742    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1743    LLVMValueRef x0, x1;
1744    LLVMValueRef y0, y1;
1745    LLVMValueRef neighbors[2][2];
1746    LLVMValueRef neighbors_lo[2][2];
1747    LLVMValueRef neighbors_hi[2][2];
1748    LLVMValueRef packed, packed_lo, packed_hi;
1749    LLVMValueRef unswizzled[4];
1750    LLVMValueRef stride;
1751
1752    assert(bld->static_state->target == PIPE_TEXTURE_2D);
1753    assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
1754    assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
1755    assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
1756
1757    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1758    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1759    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1760
1761    i32_vec_type = lp_build_vec_type(i32.type);
1762    h16_vec_type = lp_build_vec_type(h16.type);
1763    u8n_vec_type = lp_build_vec_type(u8n.type);
1764
1765    if (bld->static_state->normalized_coords) {
1766       LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1767       LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1768       LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1769       s = lp_build_mul(&bld->coord_bld, s, fp_width);
1770       t = lp_build_mul(&bld->coord_bld, t, fp_height);
1771    }
1772
1773    /* scale coords by 256 (8 fractional bits) */
1774    s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1775    t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1776
1777    /* convert float to int */
1778    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1779    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1780
1781    /* subtract 0.5 (add -128) */
1782    i32_c128 = lp_build_const_int_vec(i32.type, -128);
1783    s = LLVMBuildAdd(builder, s, i32_c128, "");
1784    t = LLVMBuildAdd(builder, t, i32_c128, "");
1785
1786    /* compute floor (shift right 8) */
1787    i32_c8 = lp_build_const_int_vec(i32.type, 8);
1788    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1789    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1790
1791    /* compute fractional part (AND with 0xff) */
1792    i32_c255 = lp_build_const_int_vec(i32.type, 255);
1793    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1794    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1795
1796    x0 = s_ipart;
1797    y0 = t_ipart;
1798
1799    x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1800    y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1801
1802    x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1803                                  bld->static_state->wrap_s);
1804    y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1805                                  bld->static_state->wrap_t);
1806
1807    x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1808                                  bld->static_state->wrap_s);
1809    y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1810                                  bld->static_state->wrap_t);
1811
1812    /*
1813     * Transform 4 x i32 in
1814     *
1815     *   s_fpart = {s0, s1, s2, s3}
1816     *
1817     * into 8 x i16
1818     *
1819     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1820     *
1821     * into two 8 x i16
1822     *
1823     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1824     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1825     *
1826     * and likewise for t_fpart. There is no risk of loosing precision here
1827     * since the fractional parts only use the lower 8bits.
1828     */
1829
1830    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1831    t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1832
1833    {
1834       LLVMTypeRef elem_type = LLVMInt32Type();
1835       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1836       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1837       LLVMValueRef shuffle_lo;
1838       LLVMValueRef shuffle_hi;
1839       unsigned i, j;
1840
1841       for(j = 0; j < h16.type.length; j += 4) {
1842 #ifdef PIPE_ARCH_LITTLE_ENDIAN
1843          unsigned subindex = 0;
1844 #else
1845          unsigned subindex = 1;
1846 #endif
1847          LLVMValueRef index;
1848
1849          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1850          for(i = 0; i < 4; ++i)
1851             shuffles_lo[j + i] = index;
1852
1853          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1854          for(i = 0; i < 4; ++i)
1855             shuffles_hi[j + i] = index;
1856       }
1857
1858       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1859       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1860
1861       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1862       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1863       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1864       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1865    }
1866
1867    stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
1868
1869    /*
1870     * Fetch the pixels as 4 x 32bit (rgba order might differ):
1871     *
1872     *   rgba0 rgba1 rgba2 rgba3
1873     *
1874     * bit cast them into 16 x u8
1875     *
1876     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1877     *
1878     * unpack them into two 8 x i16:
1879     *
1880     *   r0 g0 b0 a0 r1 g1 b1 a1
1881     *   r2 g2 b2 a2 r3 g3 b3 a3
1882     *
1883     * The higher 8 bits of the resulting elements will be zero.
1884     */
1885
1886    neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1887    neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1888    neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1889    neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1890
1891    neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1892    neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1893    neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1894    neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1895
1896    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1897    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1898    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1899    lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1900
1901    /*
1902     * Linear interpolate with 8.8 fixed point.
1903     */
1904
1905    packed_lo = lp_build_lerp_2d(&h16,
1906                                 s_fpart_lo, t_fpart_lo,
1907                                 neighbors_lo[0][0],
1908                                 neighbors_lo[0][1],
1909                                 neighbors_lo[1][0],
1910                                 neighbors_lo[1][1]);
1911
1912    packed_hi = lp_build_lerp_2d(&h16,
1913                                 s_fpart_hi, t_fpart_hi,
1914                                 neighbors_hi[0][0],
1915                                 neighbors_hi[0][1],
1916                                 neighbors_hi[1][0],
1917                                 neighbors_hi[1][1]);
1918
1919    packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1920
1921    /*
1922     * Convert to SoA and swizzle.
1923     */
1924
1925    lp_build_rgba8_to_f32_soa(bld->builder,
1926                              bld->texel_type,
1927                              packed, unswizzled);
1928
1929    if (util_format_is_rgba8_variant(bld->format_desc)) {
1930       lp_build_format_swizzle_soa(bld->format_desc,
1931                                   &bld->texel_bld,
1932                                   unswizzled, texel_out);
1933    } else {
1934       texel_out[0] = unswizzled[0];
1935       texel_out[1] = unswizzled[1];
1936       texel_out[2] = unswizzled[2];
1937       texel_out[3] = unswizzled[3];
1938    }
1939
1940    apply_sampler_swizzle(bld, texel_out);
1941 }
1942
1943
1944 static void
1945 lp_build_sample_compare(struct lp_build_sample_context *bld,
1946                         LLVMValueRef p,
1947                         LLVMValueRef texel[4])
1948 {
1949    struct lp_build_context *texel_bld = &bld->texel_bld;
1950    LLVMValueRef res;
1951    unsigned chan;
1952
1953    if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1954       return;
1955
1956    /* TODO: Compare before swizzling, to avoid redundant computations */
1957    res = NULL;
1958    for(chan = 0; chan < 4; ++chan) {
1959       LLVMValueRef cmp;
1960       cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1961       cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1962
1963       if(res)
1964          res = lp_build_add(texel_bld, res, cmp);
1965       else
1966          res = cmp;
1967    }
1968
1969    assert(res);
1970    res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
1971
1972    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1973    for(chan = 0; chan < 3; ++chan)
1974       texel[chan] = res;
1975    texel[3] = texel_bld->one;
1976 }
1977
1978
1979 /**
1980  * Just set texels to white instead of actually sampling the texture.
1981  * For debugging.
1982  */
1983 static void
1984 lp_build_sample_nop(struct lp_build_sample_context *bld,
1985                     LLVMValueRef texel_out[4])
1986 {
1987    struct lp_build_context *texel_bld = &bld->texel_bld;
1988    unsigned chan;
1989
1990    for (chan = 0; chan < 4; chan++) {
1991       /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
1992       texel_out[chan] = texel_bld->one;
1993    }
1994 }
1995
1996
1997 /**
1998  * Build texture sampling code.
1999  * 'texel' will return a vector of four LLVMValueRefs corresponding to
2000  * R, G, B, A.
2001  * \param type  vector float type to use for coords, etc.
2002  * \param ddx  partial derivatives of (s,t,r,q) with respect to x
2003  * \param ddy  partial derivatives of (s,t,r,q) with respect to y
2004  */
2005 void
2006 lp_build_sample_soa(LLVMBuilderRef builder,
2007                     const struct lp_sampler_static_state *static_state,
2008                     struct lp_sampler_dynamic_state *dynamic_state,
2009                     struct lp_type type,
2010                     unsigned unit,
2011                     unsigned num_coords,
2012                     const LLVMValueRef *coords,
2013                     const LLVMValueRef ddx[4],
2014                     const LLVMValueRef ddy[4],
2015                     LLVMValueRef lod_bias, /* optional */
2016                     LLVMValueRef explicit_lod, /* optional */
2017                     LLVMValueRef texel_out[4])
2018 {
2019    struct lp_build_sample_context bld;
2020    LLVMValueRef width, width_vec;
2021    LLVMValueRef height, height_vec;
2022    LLVMValueRef depth, depth_vec;
2023    LLVMValueRef row_stride_array, img_stride_array;
2024    LLVMValueRef data_array;
2025    LLVMValueRef s;
2026    LLVMValueRef t;
2027    LLVMValueRef r;
2028
2029    if (0) {
2030       enum pipe_format fmt = static_state->format;
2031       debug_printf("Sample from %s\n", util_format_name(fmt));
2032    }
2033
2034    assert(type.floating);
2035
2036    /* Setup our build context */
2037    memset(&bld, 0, sizeof bld);
2038    bld.builder = builder;
2039    bld.static_state = static_state;
2040    bld.dynamic_state = dynamic_state;
2041    bld.format_desc = util_format_description(static_state->format);
2042
2043    bld.float_type = lp_type_float(32);
2044    bld.int_type = lp_type_int(32);
2045    bld.coord_type = type;
2046    bld.uint_coord_type = lp_uint_type(type);
2047    bld.int_coord_type = lp_int_type(type);
2048    bld.texel_type = type;
2049
2050    lp_build_context_init(&bld.float_bld, builder, bld.float_type);
2051    lp_build_context_init(&bld.int_bld, builder, bld.int_type);
2052    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
2053    lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
2054    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
2055    lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
2056
2057    /* Get the dynamic state */
2058    width = dynamic_state->width(dynamic_state, builder, unit);
2059    height = dynamic_state->height(dynamic_state, builder, unit);
2060    depth = dynamic_state->depth(dynamic_state, builder, unit);
2061    row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
2062    img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
2063    data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
2064    /* Note that data_array is an array[level] of pointers to texture images */
2065
2066    s = coords[0];
2067    t = coords[1];
2068    r = coords[2];
2069
2070    width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
2071    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
2072    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
2073
2074    if (0) {
2075       /* For debug: no-op texture sampling */
2076       lp_build_sample_nop(&bld, texel_out);
2077    }
2078    else if (util_format_fits_8unorm(bld.format_desc) &&
2079             bld.format_desc->nr_channels > 1 &&
2080             static_state->target == PIPE_TEXTURE_2D &&
2081             static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
2082             static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
2083             static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2084             is_simple_wrap_mode(static_state->wrap_s) &&
2085             is_simple_wrap_mode(static_state->wrap_t)) {
2086       /* special case */
2087       lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
2088                                     row_stride_array, data_array, texel_out);
2089    }
2090    else {
2091       lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
2092                               lod_bias, explicit_lod,
2093                               width, height, depth,
2094                               width_vec, height_vec, depth_vec,
2095                               row_stride_array, img_stride_array,
2096                               data_array,
2097                               texel_out);
2098    }
2099
2100    lp_build_sample_compare(&bld, r, texel_out);
2101 }