src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file
  30  * Texture sampling -- SoA.
  31  *
  32  * @author Jose Fonseca <jfonseca@vmware.com>
  33  * @author Brian Paul <brianp@vmware.com>
  34  */
  35
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "util/u_debug.h"
  39 #include "util/u_dump.h"
  40 #include "util/u_memory.h"
  41 #include "util/u_math.h"
  42 #include "util/u_format.h"
  43 #include "lp_bld_debug.h"
  44 #include "lp_bld_type.h"
  45 #include "lp_bld_const.h"
  46 #include "lp_bld_conv.h"
  47 #include "lp_bld_arit.h"
  48 #include "lp_bld_bitarit.h"
  49 #include "lp_bld_logic.h"
  50 #include "lp_bld_swizzle.h"
  51 #include "lp_bld_pack.h"
  52 #include "lp_bld_flow.h"
  53 #include "lp_bld_gather.h"
  54 #include "lp_bld_format.h"
  55 #include "lp_bld_sample.h"
  56 #include "lp_bld_sample_aos.h"
  57 #include "lp_bld_quad.h"
  58
  59
  60 /**
  61  * Build LLVM code for texture coord wrapping, for nearest filtering,
  62  * for scaled integer texcoords.
  63  * \param block_length  is the length of the pixel block along the
  64  *                      coordinate axis
  65  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
  66  * \param length  the texture size along one dimension
  67  * \param stride  pixel stride along the coordinate axis (in bytes)
  68  * \param is_pot  if TRUE, length is a power of two
  69  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  70  * \param out_offset  byte offset for the wrapped coordinate
  71  * \param out_i  resulting sub-block pixel coordinate for coord0
  72  */
  73 static void
  74 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
  75                                  unsigned block_length,
  76                                  LLVMValueRef coord,
  77                                  LLVMValueRef length,
  78                                  LLVMValueRef stride,
  79                                  boolean is_pot,
  80                                  unsigned wrap_mode,
  81                                  LLVMValueRef *out_offset,
  82                                  LLVMValueRef *out_i)
  83 {
  84    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
  85    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  86    LLVMValueRef length_minus_one;
  87
  88    length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
  89
  90    switch(wrap_mode) {
  91    case PIPE_TEX_WRAP_REPEAT:
  92       if(is_pot)
  93          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
  94       else {
  95          /* Add a bias to the texcoord to handle negative coords */
  96          LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
  97          coord = LLVMBuildAdd(bld->builder, coord, bias, "");
  98          coord = LLVMBuildURem(bld->builder, coord, length, "");
  99       }
 100       break;
 101
 102    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 103       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
 104       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
 105       break;
 106
 107    case PIPE_TEX_WRAP_CLAMP:
 108    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 109    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 110    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 111    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 112    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 113    default:
 114       assert(0);
 115    }
 116
 117    lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
 118                                   out_offset, out_i);
 119 }
 120
 121
 122 /**
 123  * Build LLVM code for texture coord wrapping, for linear filtering,
 124  * for scaled integer texcoords.
 125  * \param block_length  is the length of the pixel block along the
 126  *                      coordinate axis
 127  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
 128  * \param length  the texture size along one dimension
 129  * \param stride  pixel stride along the coordinate axis (in bytes)
 130  * \param is_pot  if TRUE, length is a power of two
 131  * \param wrap_mode  one of PIPE_TEX_WRAP_x
 132  * \param offset0  resulting relative offset for coord0
 133  * \param offset1  resulting relative offset for coord0 + 1
 134  * \param i0  resulting sub-block pixel coordinate for coord0
 135  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 136  */
 137 static void
 138 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 139                                 unsigned block_length,
 140                                 LLVMValueRef coord0,
 141                                 LLVMValueRef length,
 142                                 LLVMValueRef stride,
 143                                 boolean is_pot,
 144                                 unsigned wrap_mode,
 145                                 LLVMValueRef *offset0,
 146                                 LLVMValueRef *offset1,
 147                                 LLVMValueRef *i0,
 148                                 LLVMValueRef *i1)
 149 {
 150    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
 151    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
 152    LLVMValueRef length_minus_one;
 153    LLVMValueRef lmask, umask, mask;
 154
 155    if (block_length != 1) {
 156       /*
 157        * If the pixel block covers more than one pixel then there is no easy
 158        * way to calculate offset1 relative to offset0. Instead, compute them
 159        * independently.
 160        */
 161
 162       LLVMValueRef coord1;
 163
 164       lp_build_sample_wrap_nearest_int(bld,
 165                                        block_length,
 166                                        coord0,
 167                                        length,
 168                                        stride,
 169                                        is_pot,
 170                                        wrap_mode,
 171                                        offset0, i0);
 172
 173       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
 174
 175       lp_build_sample_wrap_nearest_int(bld,
 176                                        block_length,
 177                                        coord1,
 178                                        length,
 179                                        stride,
 180                                        is_pot,
 181                                        wrap_mode,
 182                                        offset1, i1);
 183
 184       return;
 185    }
 186
 187    /*
 188     * Scalar pixels -- try to compute offset0 and offset1 with a single stride
 189     * multiplication.
 190     */
 191
 192    *i0 = uint_coord_bld->zero;
 193    *i1 = uint_coord_bld->zero;
 194
 195    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 196
 197    switch(wrap_mode) {
 198    case PIPE_TEX_WRAP_REPEAT:
 199       if (is_pot) {
 200          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
 201       }
 202       else {
 203          /* Add a bias to the texcoord to handle negative coords */
 204          LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
 205          coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
 206          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
 207       }
 208
 209       mask = lp_build_compare(bld->builder, int_coord_bld->type,
 210                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 211
 212       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 213       *offset1 = LLVMBuildAnd(bld->builder,
 214                               lp_build_add(uint_coord_bld, *offset0, stride),
 215                               mask, "");
 216       break;
 217
 218    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 219       lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 220                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
 221       umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
 222                                PIPE_FUNC_LESS, coord0, length_minus_one);
 223
 224       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
 225       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
 226
 227       mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
 228
 229       *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
 230       *offset1 = lp_build_add(uint_coord_bld,
 231                               *offset0,
 232                               LLVMBuildAnd(bld->builder, stride, mask, ""));
 233       break;
 234
 235    case PIPE_TEX_WRAP_CLAMP:
 236    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 237    case PIPE_TEX_WRAP_MIRROR_REPEAT:
 238    case PIPE_TEX_WRAP_MIRROR_CLAMP:
 239    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 240    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 241    default:
 242       assert(0);
 243       *offset0 = uint_coord_bld->zero;
 244       *offset1 = uint_coord_bld->zero;
 245       break;
 246    }
 247 }
 248
 249
 250 /**
 251  * Sample a single texture image with nearest sampling.
 252  * If sampling a cube texture, r = cube face in [0,5].
 253  * Return filtered color as two vectors of 16-bit fixed point values.
 254  */
 255 static void
 256 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
 257                               LLVMValueRef int_size,
 258                               LLVMValueRef row_stride_vec,
 259                               LLVMValueRef img_stride_vec,
 260                               LLVMValueRef data_ptr,
 261                               LLVMValueRef s,
 262                               LLVMValueRef t,
 263                               LLVMValueRef r,
 264                               LLVMValueRef *colors_lo,
 265                               LLVMValueRef *colors_hi)
 266 {
 267    const unsigned dims = bld->dims;
 268    LLVMBuilderRef builder = bld->builder;
 269    struct lp_build_context i32, h16, u8n;
 270    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 271    LLVMValueRef i32_c8;
 272    LLVMValueRef width_vec, height_vec, depth_vec;
 273    LLVMValueRef s_ipart, t_ipart, r_ipart;
 274    LLVMValueRef x_stride;
 275    LLVMValueRef x_offset, offset;
 276    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
 277
 278    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
 279    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 280    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 281
 282    i32_vec_type = lp_build_vec_type(i32.type);
 283    h16_vec_type = lp_build_vec_type(h16.type);
 284    u8n_vec_type = lp_build_vec_type(u8n.type);
 285
 286    lp_build_extract_image_sizes(bld,
 287                                 bld->int_size_type,
 288                                 bld->int_coord_type,
 289                                 int_size,
 290                                 &width_vec,
 291                                 &height_vec,
 292                                 &depth_vec);
 293
 294    if (bld->static_state->normalized_coords) {
 295       LLVMValueRef scaled_size;
 296       LLVMValueRef flt_size;
 297
 298       /* scale size by 256 (8 fractional bits) */
 299       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
 300
 301       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
 302
 303       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 304    }
 305    else {
 306       /* scale coords by 256 (8 fractional bits) */
 307       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 308       if (dims >= 2)
 309          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 310       if (dims >= 3)
 311          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 312    }
 313
 314    /* convert float to int */
 315    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 316    if (dims >= 2)
 317       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 318    if (dims >= 3)
 319       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 320
 321    /* compute floor (shift right 8) */
 322    i32_c8 = lp_build_const_int_vec(i32.type, 8);
 323    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 324    if (dims >= 2)
 325       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 326    if (dims >= 3)
 327       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 328
 329    /* get pixel, row, image strides */
 330    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
 331                                  bld->format_desc->block.bits/8);
 332
 333    /* Do texcoord wrapping, compute texel offset */
 334    lp_build_sample_wrap_nearest_int(bld,
 335                                     bld->format_desc->block.width,
 336                                     s_ipart, width_vec, x_stride,
 337                                     bld->static_state->pot_width,
 338                                     bld->static_state->wrap_s,
 339                                     &x_offset, &x_subcoord);
 340    offset = x_offset;
 341    if (dims >= 2) {
 342       LLVMValueRef y_offset;
 343       lp_build_sample_wrap_nearest_int(bld,
 344                                        bld->format_desc->block.height,
 345                                        t_ipart, height_vec, row_stride_vec,
 346                                        bld->static_state->pot_height,
 347                                        bld->static_state->wrap_t,
 348                                        &y_offset, &y_subcoord);
 349       offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
 350       if (dims >= 3) {
 351          LLVMValueRef z_offset;
 352          lp_build_sample_wrap_nearest_int(bld,
 353                                           1, /* block length (depth) */
 354                                           r_ipart, depth_vec, img_stride_vec,
 355                                           bld->static_state->pot_height,
 356                                           bld->static_state->wrap_r,
 357                                           &z_offset, &z_subcoord);
 358          offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
 359       }
 360       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 361          LLVMValueRef z_offset;
 362          /* The r coord is the cube face in [0,5] */
 363          z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
 364          offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
 365       }
 366    }
 367
 368    /*
 369     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 370     *
 371     *   rgba0 rgba1 rgba2 rgba3
 372     *
 373     * bit cast them into 16 x u8
 374     *
 375     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 376     *
 377     * unpack them into two 8 x i16:
 378     *
 379     *   r0 g0 b0 a0 r1 g1 b1 a1
 380     *   r2 g2 b2 a2 r3 g3 b3 a3
 381     *
 382     * The higher 8 bits of the resulting elements will be zero.
 383     */
 384    {
 385       LLVMValueRef rgba8;
 386
 387       if (util_format_is_rgba8_variant(bld->format_desc)) {
 388          /*
 389           * Given the format is a rgba8, just read the pixels as is,
 390           * without any swizzling. Swizzling will be done later.
 391           */
 392          rgba8 = lp_build_gather(bld->builder,
 393                                  bld->texel_type.length,
 394                                  bld->format_desc->block.bits,
 395                                  bld->texel_type.width,
 396                                  data_ptr, offset);
 397
 398          rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 399       }
 400       else {
 401          rgba8 = lp_build_fetch_rgba_aos(bld->builder,
 402                                          bld->format_desc,
 403                                          u8n.type,
 404                                          data_ptr, offset,
 405                                          x_subcoord,
 406                                          y_subcoord);
 407       }
 408
 409       /* Expand one 4*rgba8 to two 2*rgba16 */
 410       lp_build_unpack2(builder, u8n.type, h16.type,
 411                        rgba8,
 412                        colors_lo, colors_hi);
 413    }
 414 }
 415
 416
 417 /**
 418  * Sample a single texture image with (bi-)(tri-)linear sampling.
 419  * Return filtered color as two vectors of 16-bit fixed point values.
 420  */
 421 static void
 422 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 423                              LLVMValueRef int_size,
 424                              LLVMValueRef row_stride_vec,
 425                              LLVMValueRef img_stride_vec,
 426                              LLVMValueRef data_ptr,
 427                              LLVMValueRef s,
 428                              LLVMValueRef t,
 429                              LLVMValueRef r,
 430                              LLVMValueRef *colors_lo,
 431                              LLVMValueRef *colors_hi)
 432 {
 433    const unsigned dims = bld->dims;
 434    LLVMBuilderRef builder = bld->builder;
 435    struct lp_build_context i32, h16, u8n;
 436    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
 437    LLVMValueRef i32_c8, i32_c128, i32_c255;
 438    LLVMValueRef width_vec, height_vec, depth_vec;
 439    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
 440    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
 441    LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
 442    LLVMValueRef x_stride, y_stride, z_stride;
 443    LLVMValueRef x_offset0, x_offset1;
 444    LLVMValueRef y_offset0, y_offset1;
 445    LLVMValueRef z_offset0, z_offset1;
 446    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
 447    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
 448    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
 449    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
 450    LLVMValueRef packed_lo, packed_hi;
 451    unsigned x, y, z;
 452    unsigned i, j, k;
 453    unsigned numj, numk;
 454
 455    lp_build_context_init(&i32, builder, lp_type_int_vec(32));
 456    lp_build_context_init(&h16, builder, lp_type_ufixed(16));
 457    lp_build_context_init(&u8n, builder, lp_type_unorm(8));
 458
 459    i32_vec_type = lp_build_vec_type(i32.type);
 460    h16_vec_type = lp_build_vec_type(h16.type);
 461    u8n_vec_type = lp_build_vec_type(u8n.type);
 462
 463    lp_build_extract_image_sizes(bld,
 464                                 bld->int_size_type,
 465                                 bld->int_coord_type,
 466                                 int_size,
 467                                 &width_vec,
 468                                 &height_vec,
 469                                 &depth_vec);
 470
 471    if (bld->static_state->normalized_coords) {
 472       LLVMValueRef scaled_size;
 473       LLVMValueRef flt_size;
 474
 475       /* scale size by 256 (8 fractional bits) */
 476       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
 477
 478       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
 479
 480       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
 481    }
 482    else {
 483       /* scale coords by 256 (8 fractional bits) */
 484       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
 485       if (dims >= 2)
 486          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 487       if (dims >= 3)
 488          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
 489    }
 490
 491    /* convert float to int */
 492    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
 493    if (dims >= 2)
 494       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 495    if (dims >= 3)
 496       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
 497
 498    /* subtract 0.5 (add -128) */
 499    i32_c128 = lp_build_const_int_vec(i32.type, -128);
 500    s = LLVMBuildAdd(builder, s, i32_c128, "");
 501    if (dims >= 2) {
 502       t = LLVMBuildAdd(builder, t, i32_c128, "");
 503    }
 504    if (dims >= 3) {
 505       r = LLVMBuildAdd(builder, r, i32_c128, "");
 506    }
 507
 508    /* compute floor (shift right 8) */
 509    i32_c8 = lp_build_const_int_vec(i32.type, 8);
 510    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
 511    if (dims >= 2)
 512       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 513    if (dims >= 3)
 514       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 515
 516    /* compute fractional part (AND with 0xff) */
 517    i32_c255 = lp_build_const_int_vec(i32.type, 255);
 518    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
 519    if (dims >= 2)
 520       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 521    if (dims >= 3)
 522       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
 523
 524    /* get pixel, row and image strides */
 525    x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
 526                                  bld->format_desc->block.bits/8);
 527    y_stride = row_stride_vec;
 528    z_stride = img_stride_vec;
 529
 530    /* do texcoord wrapping and compute texel offsets */
 531    lp_build_sample_wrap_linear_int(bld,
 532                                    bld->format_desc->block.width,
 533                                    s_ipart, width_vec, x_stride,
 534                                    bld->static_state->pot_width,
 535                                    bld->static_state->wrap_s,
 536                                    &x_offset0, &x_offset1,
 537                                    &x_subcoord[0], &x_subcoord[1]);
 538    for (z = 0; z < 2; z++) {
 539       for (y = 0; y < 2; y++) {
 540          offset[z][y][0] = x_offset0;
 541          offset[z][y][1] = x_offset1;
 542       }
 543    }
 544
 545    if (dims >= 2) {
 546       lp_build_sample_wrap_linear_int(bld,
 547                                       bld->format_desc->block.height,
 548                                       t_ipart, height_vec, y_stride,
 549                                       bld->static_state->pot_height,
 550                                       bld->static_state->wrap_t,
 551                                       &y_offset0, &y_offset1,
 552                                       &y_subcoord[0], &y_subcoord[1]);
 553
 554       for (z = 0; z < 2; z++) {
 555          for (x = 0; x < 2; x++) {
 556             offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
 557                                            offset[z][0][x], y_offset0);
 558             offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
 559                                            offset[z][1][x], y_offset1);
 560          }
 561       }
 562    }
 563
 564    if (dims >= 3) {
 565       lp_build_sample_wrap_linear_int(bld,
 566                                       bld->format_desc->block.height,
 567                                       r_ipart, depth_vec, z_stride,
 568                                       bld->static_state->pot_depth,
 569                                       bld->static_state->wrap_r,
 570                                       &z_offset0, &z_offset1,
 571                                       &z_subcoord[0], &z_subcoord[1]);
 572       for (y = 0; y < 2; y++) {
 573          for (x = 0; x < 2; x++) {
 574             offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
 575                                            offset[0][y][x], z_offset0);
 576             offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
 577                                            offset[1][y][x], z_offset1);
 578          }
 579       }
 580    }
 581    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 582       LLVMValueRef z_offset;
 583       z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
 584       for (y = 0; y < 2; y++) {
 585          for (x = 0; x < 2; x++) {
 586             /* The r coord is the cube face in [0,5] */
 587             offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
 588                                            offset[0][y][x], z_offset);
 589          }
 590       }
 591    }
 592
 593    /*
 594     * Transform 4 x i32 in
 595     *
 596     *   s_fpart = {s0, s1, s2, s3}
 597     *
 598     * into 8 x i16
 599     *
 600     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
 601     *
 602     * into two 8 x i16
 603     *
 604     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
 605     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
 606     *
 607     * and likewise for t_fpart. There is no risk of loosing precision here
 608     * since the fractional parts only use the lower 8bits.
 609     */
 610    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
 611    if (dims >= 2)
 612       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
 613    if (dims >= 3)
 614       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
 615
 616    {
 617       LLVMTypeRef elem_type = LLVMInt32Type();
 618       LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
 619       LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
 620       LLVMValueRef shuffle_lo;
 621       LLVMValueRef shuffle_hi;
 622
 623       for (j = 0; j < h16.type.length; j += 4) {
 624 #ifdef PIPE_ARCH_LITTLE_ENDIAN
 625          unsigned subindex = 0;
 626 #else
 627          unsigned subindex = 1;
 628 #endif
 629          LLVMValueRef index;
 630
 631          index = LLVMConstInt(elem_type, j/2 + subindex, 0);
 632          for (i = 0; i < 4; ++i)
 633             shuffles_lo[j + i] = index;
 634
 635          index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
 636          for (i = 0; i < 4; ++i)
 637             shuffles_hi[j + i] = index;
 638       }
 639
 640       shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
 641       shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
 642
 643       s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 644                                           shuffle_lo, "");
 645       s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
 646                                           shuffle_hi, "");
 647       if (dims >= 2) {
 648          t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 649                                              shuffle_lo, "");
 650          t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
 651                                              shuffle_hi, "");
 652       }
 653       if (dims >= 3) {
 654          r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 655                                              shuffle_lo, "");
 656          r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
 657                                              shuffle_hi, "");
 658       }
 659    }
 660
 661    /*
 662     * Fetch the pixels as 4 x 32bit (rgba order might differ):
 663     *
 664     *   rgba0 rgba1 rgba2 rgba3
 665     *
 666     * bit cast them into 16 x u8
 667     *
 668     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
 669     *
 670     * unpack them into two 8 x i16:
 671     *
 672     *   r0 g0 b0 a0 r1 g1 b1 a1
 673     *   r2 g2 b2 a2 r3 g3 b3 a3
 674     *
 675     * The higher 8 bits of the resulting elements will be zero.
 676     */
 677    numj = 1 + (dims >= 2);
 678    numk = 1 + (dims >= 3);
 679
 680    for (k = 0; k < numk; k++) {
 681       for (j = 0; j < numj; j++) {
 682          for (i = 0; i < 2; i++) {
 683             LLVMValueRef rgba8;
 684
 685             if (util_format_is_rgba8_variant(bld->format_desc)) {
 686                /*
 687                 * Given the format is a rgba8, just read the pixels as is,
 688                 * without any swizzling. Swizzling will be done later.
 689                 */
 690                rgba8 = lp_build_gather(bld->builder,
 691                                        bld->texel_type.length,
 692                                        bld->format_desc->block.bits,
 693                                        bld->texel_type.width,
 694                                        data_ptr, offset[k][j][i]);
 695
 696                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
 697             }
 698             else {
 699                rgba8 = lp_build_fetch_rgba_aos(bld->builder,
 700                                                bld->format_desc,
 701                                                u8n.type,
 702                                                data_ptr, offset[k][j][i],
 703                                                x_subcoord[i],
 704                                                y_subcoord[j]);
 705             }
 706
 707             /* Expand one 4*rgba8 to two 2*rgba16 */
 708             lp_build_unpack2(builder, u8n.type, h16.type,
 709                              rgba8,
 710                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
 711          }
 712       }
 713    }
 714
 715    /*
 716     * Linear interpolation with 8.8 fixed point.
 717     */
 718    if (dims == 1) {
 719       /* 1-D lerp */
 720       packed_lo = lp_build_lerp(&h16,
 721                                 s_fpart_lo,
 722                                 neighbors_lo[0][0][0],
 723                                 neighbors_lo[0][0][1]);
 724
 725       packed_hi = lp_build_lerp(&h16,
 726                                 s_fpart_hi,
 727                                 neighbors_hi[0][0][0],
 728                                 neighbors_hi[0][0][1]);
 729    }
 730    else {
 731       /* 2-D lerp */
 732       packed_lo = lp_build_lerp_2d(&h16,
 733                                    s_fpart_lo, t_fpart_lo,
 734                                    neighbors_lo[0][0][0],
 735                                    neighbors_lo[0][0][1],
 736                                    neighbors_lo[0][1][0],
 737                                    neighbors_lo[0][1][1]);
 738
 739       packed_hi = lp_build_lerp_2d(&h16,
 740                                    s_fpart_hi, t_fpart_hi,
 741                                    neighbors_hi[0][0][0],
 742                                    neighbors_hi[0][0][1],
 743                                    neighbors_hi[0][1][0],
 744                                    neighbors_hi[0][1][1]);
 745
 746       if (dims >= 3) {
 747          LLVMValueRef packed_lo2, packed_hi2;
 748
 749          /* lerp in the second z slice */
 750          packed_lo2 = lp_build_lerp_2d(&h16,
 751                                        s_fpart_lo, t_fpart_lo,
 752                                        neighbors_lo[1][0][0],
 753                                        neighbors_lo[1][0][1],
 754                                        neighbors_lo[1][1][0],
 755                                        neighbors_lo[1][1][1]);
 756
 757          packed_hi2 = lp_build_lerp_2d(&h16,
 758                                        s_fpart_hi, t_fpart_hi,
 759                                        neighbors_hi[1][0][0],
 760                                        neighbors_hi[1][0][1],
 761                                        neighbors_hi[1][1][0],
 762                                        neighbors_hi[1][1][1]);
 763          /* interp between two z slices */
 764          packed_lo = lp_build_lerp(&h16, r_fpart_lo,
 765                                    packed_lo, packed_lo2);
 766          packed_hi = lp_build_lerp(&h16, r_fpart_hi,
 767                                    packed_hi, packed_hi2);
 768       }
 769    }
 770
 771    *colors_lo = packed_lo;
 772    *colors_hi = packed_hi;
 773 }
 774
 775
 776 /**
 777  * Sample the texture/mipmap using given image filter and mip filter.
 778  * data0_ptr and data1_ptr point to the two mipmap levels to sample
 779  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
 780  * If we're using nearest miplevel sampling the '1' values will be null/unused.
 781  */
 782 static void
 783 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 784                        unsigned img_filter,
 785                        unsigned mip_filter,
 786                        LLVMValueRef s,
 787                        LLVMValueRef t,
 788                        LLVMValueRef r,
 789                        LLVMValueRef ilevel0,
 790                        LLVMValueRef ilevel1,
 791                        LLVMValueRef lod_fpart,
 792                        LLVMValueRef colors_lo_var,
 793                        LLVMValueRef colors_hi_var)
 794 {
 795    LLVMBuilderRef builder = bld->builder;
 796    LLVMValueRef size0;
 797    LLVMValueRef size1;
 798    LLVMValueRef row_stride0_vec;
 799    LLVMValueRef row_stride1_vec;
 800    LLVMValueRef img_stride0_vec;
 801    LLVMValueRef img_stride1_vec;
 802    LLVMValueRef data_ptr0;
 803    LLVMValueRef data_ptr1;
 804    LLVMValueRef colors0_lo, colors0_hi;
 805    LLVMValueRef colors1_lo, colors1_hi;
 806
 807
 808    /* sample the first mipmap level */
 809    lp_build_mipmap_level_sizes(bld, ilevel0,
 810                                &size0,
 811                                &row_stride0_vec, &img_stride0_vec);
 812    data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
 813    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 814       lp_build_sample_image_nearest(bld,
 815                                     size0,
 816                                     row_stride0_vec, img_stride0_vec,
 817                                     data_ptr0, s, t, r,
 818                                     &colors0_lo, &colors0_hi);
 819    }
 820    else {
 821       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
 822       lp_build_sample_image_linear(bld,
 823                                    size0,
 824                                    row_stride0_vec, img_stride0_vec,
 825                                    data_ptr0, s, t, r,
 826                                    &colors0_lo, &colors0_hi);
 827    }
 828
 829    /* Store the first level's colors in the output variables */
 830    LLVMBuildStore(builder, colors0_lo, colors_lo_var);
 831    LLVMBuildStore(builder, colors0_hi, colors_hi_var);
 832
 833    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
 834       LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0);
 835       LLVMTypeRef i32_type = LLVMIntType(32);
 836       struct lp_build_flow_context *flow_ctx;
 837       struct lp_build_if_state if_ctx;
 838       LLVMValueRef need_lerp;
 839
 840       flow_ctx = lp_build_flow_create(builder);
 841
 842       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
 843       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
 844
 845       /* need_lerp = lod_fpart > 0 */
 846       need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
 847                                 lod_fpart, LLVMConstNull(i32_type),
 848                                 "need_lerp");
 849
 850       lp_build_if(&if_ctx, flow_ctx, builder, need_lerp);
 851       {
 852          struct lp_build_context h16_bld;
 853
 854          lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
 855
 856          /* sample the second mipmap level */
 857          lp_build_mipmap_level_sizes(bld, ilevel1,
 858                                      &size1,
 859                                      &row_stride1_vec, &img_stride1_vec);
 860          data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
 861          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
 862             lp_build_sample_image_nearest(bld,
 863                                           size1,
 864                                           row_stride1_vec, img_stride1_vec,
 865                                           data_ptr1, s, t, r,
 866                                           &colors1_lo, &colors1_hi);
 867          }
 868          else {
 869             lp_build_sample_image_linear(bld,
 870                                          size1,
 871                                          row_stride1_vec, img_stride1_vec,
 872                                          data_ptr1, s, t, r,
 873                                          &colors1_lo, &colors1_hi);
 874          }
 875
 876          /* interpolate samples from the two mipmap levels */
 877
 878          lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
 879          lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
 880
 881          colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
 882                                     colors0_lo, colors1_lo);
 883          colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
 884                                     colors0_hi, colors1_hi);
 885
 886          LLVMBuildStore(builder, colors0_lo, colors_lo_var);
 887          LLVMBuildStore(builder, colors0_hi, colors_hi_var);
 888       }
 889       lp_build_endif(&if_ctx);
 890
 891       lp_build_flow_destroy(flow_ctx);
 892    }
 893 }
 894
 895
 896
 897 /**
 898  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
 899  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
 900  * but only limited texture coord wrap modes.
 901  */
 902 void
 903 lp_build_sample_aos(struct lp_build_sample_context *bld,
 904                     unsigned unit,
 905                     LLVMValueRef s,
 906                     LLVMValueRef t,
 907                     LLVMValueRef r,
 908                     const LLVMValueRef *ddx,
 909                     const LLVMValueRef *ddy,
 910                     LLVMValueRef lod_bias, /* optional */
 911                     LLVMValueRef explicit_lod, /* optional */
 912                     LLVMValueRef texel_out[4])
 913 {
 914    struct lp_build_context *int_bld = &bld->int_bld;
 915    LLVMBuilderRef builder = bld->builder;
 916    const unsigned mip_filter = bld->static_state->min_mip_filter;
 917    const unsigned min_filter = bld->static_state->min_img_filter;
 918    const unsigned mag_filter = bld->static_state->mag_img_filter;
 919    const unsigned dims = bld->dims;
 920    LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
 921    LLVMValueRef ilevel0, ilevel1 = NULL;
 922    LLVMValueRef packed, packed_lo, packed_hi;
 923    LLVMValueRef unswizzled[4];
 924    LLVMValueRef face_ddx[4], face_ddy[4];
 925    struct lp_build_context h16_bld;
 926    LLVMTypeRef i32t = LLVMInt32Type();
 927    LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
 928
 929    /* we only support the common/simple wrap modes at this time */
 930    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
 931    if (dims >= 2)
 932       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
 933    if (dims >= 3)
 934       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
 935
 936
 937    /* make 16-bit fixed-pt builder context */
 938    lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
 939
 940    /* cube face selection, compute pre-face coords, etc. */
 941    if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 942       LLVMValueRef face, face_s, face_t;
 943       lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
 944       s = face_s; /* vec */
 945       t = face_t; /* vec */
 946       /* use 'r' to indicate cube face */
 947       r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
 948
 949       /* recompute ddx, ddy using the new (s,t) face texcoords */
 950       face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
 951       face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
 952       face_ddx[2] = NULL;
 953       face_ddx[3] = NULL;
 954       face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
 955       face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
 956       face_ddy[2] = NULL;
 957       face_ddy[3] = NULL;
 958       ddx = face_ddx;
 959       ddy = face_ddy;
 960    }
 961
 962    /*
 963     * Compute the level of detail (float).
 964     */
 965    if (min_filter != mag_filter ||
 966        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
 967       /* Need to compute lod either to choose mipmap levels or to
 968        * distinguish between minification/magnification with one mipmap level.
 969        */
 970       lp_build_lod_selector(bld, unit, ddx, ddy,
 971                             lod_bias, explicit_lod,
 972                             mip_filter,
 973                             &lod_ipart, &lod_fpart);
 974    } else {
 975       lod_ipart = i32t_zero;
 976    }
 977
 978    /*
 979     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
 980     */
 981    switch (mip_filter) {
 982    default:
 983       assert(0 && "bad mip_filter value in lp_build_sample_aos()");
 984       /* fall-through */
 985    case PIPE_TEX_MIPFILTER_NONE:
 986       /* always use mip level 0 */
 987       if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
 988          /* XXX this is a work-around for an apparent bug in LLVM 2.7.
 989           * We should be able to set ilevel0 = const(0) but that causes
 990           * bad x86 code to be emitted.
 991           */
 992          assert(lod_ipart);
 993          lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
 994       }
 995       else {
 996          ilevel0 = i32t_zero;
 997       }
 998       break;
 999    case PIPE_TEX_MIPFILTER_NEAREST:
1000       assert(lod_ipart);
1001       lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1002       break;
1003    case PIPE_TEX_MIPFILTER_LINEAR:
1004       assert(lod_ipart);
1005       assert(lod_fpart);
1006       lp_build_linear_mip_levels(bld, unit,
1007                                  lod_ipart, &lod_fpart,
1008                                  &ilevel0, &ilevel1);
1009       break;
1010    }
1011
1012    /*
1013     * Get/interpolate texture colors.
1014     */
1015
1016    packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo");
1017    packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi");
1018
1019    if (min_filter == mag_filter) {
1020       /* no need to distinquish between minification and magnification */
1021       lp_build_sample_mipmap(bld,
1022                              min_filter, mip_filter,
1023                              s, t, r,
1024                              ilevel0, ilevel1, lod_fpart,
1025                              packed_lo, packed_hi);
1026    }
1027    else {
1028       /* Emit conditional to choose min image filter or mag image filter
1029        * depending on the lod being > 0 or <= 0, respectively.
1030        */
1031       struct lp_build_flow_context *flow_ctx;
1032       struct lp_build_if_state if_ctx;
1033       LLVMValueRef minify;
1034
1035       flow_ctx = lp_build_flow_create(builder);
1036
1037       /* minify = lod >= 0.0 */
1038       minify = LLVMBuildICmp(builder, LLVMIntSGE,
1039                              lod_ipart, int_bld->zero, "");
1040
1041       lp_build_if(&if_ctx, flow_ctx, builder, minify);
1042       {
1043          /* Use the minification filter */
1044          lp_build_sample_mipmap(bld,
1045                                 min_filter, mip_filter,
1046                                 s, t, r,
1047                                 ilevel0, ilevel1, lod_fpart,
1048                                 packed_lo, packed_hi);
1049       }
1050       lp_build_else(&if_ctx);
1051       {
1052          /* Use the magnification filter */
1053          lp_build_sample_mipmap(bld,
1054                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1055                                 s, t, r,
1056                                 i32t_zero, NULL, NULL,
1057                                 packed_lo, packed_hi);
1058       }
1059       lp_build_endif(&if_ctx);
1060
1061       lp_build_flow_destroy(flow_ctx);
1062    }
1063
1064    /*
1065     * combine the values stored in 'packed_lo' and 'packed_hi' variables
1066     * into 'packed'
1067     */
1068    packed = lp_build_pack2(builder,
1069                            h16_bld.type, lp_type_unorm(8),
1070                            LLVMBuildLoad(builder, packed_lo, ""),
1071                            LLVMBuildLoad(builder, packed_hi, ""));
1072
1073    /*
1074     * Convert to SoA and swizzle.
1075     */
1076    lp_build_rgba8_to_f32_soa(builder,
1077                              bld->texel_type,
1078                              packed, unswizzled);
1079
1080    if (util_format_is_rgba8_variant(bld->format_desc)) {
1081       lp_build_format_swizzle_soa(bld->format_desc,
1082                                   &bld->texel_bld,
1083                                   unswizzled, texel_out);
1084    }
1085    else {
1086       texel_out[0] = unswizzled[0];
1087       texel_out[1] = unswizzled[1];
1088       texel_out[2] = unswizzled[2];
1089       texel_out[3] = unswizzled[3];
1090    }
1091
1092    apply_sampler_swizzle(bld, texel_out);
1093 }