src/gallium/drivers/llvmpipe/lp_bld_interp.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * Copyright 2007-2008 VMware, Inc.
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * @file
  31  * Position and shader input interpolation.
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  */
  35
  36 #include "pipe/p_shader_tokens.h"
  37 #include "util/u_debug.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_math.h"
  40 #include "tgsi/tgsi_scan.h"
  41 #include "gallivm/lp_bld_debug.h"
  42 #include "gallivm/lp_bld_const.h"
  43 #include "gallivm/lp_bld_arit.h"
  44 #include "gallivm/lp_bld_swizzle.h"
  45 #include "gallivm/lp_bld_flow.h"
  46 #include "gallivm/lp_bld_logic.h"
  47 #include "gallivm/lp_bld_struct.h"
  48 #include "gallivm/lp_bld_gather.h"
  49 #include "lp_bld_interp.h"
  50
  51
  52 /*
  53  * The shader JIT function operates on blocks of quads.
  54  * Each block has 2x2 quads and each quad has 2x2 pixels.
  55  *
  56  * We iterate over the quads in order 0, 1, 2, 3:
  57  *
  58  * #################
  59  * #   |   #   |   #
  60  * #---0---#---1---#
  61  * #   |   #   |   #
  62  * #################
  63  * #   |   #   |   #
  64  * #---2---#---3---#
  65  * #   |   #   |   #
  66  * #################
  67  *
  68  * If we iterate over multiple quads at once, quads 01 and 23 are processed
  69  * together.
  70  *
  71  * Within each quad, we have four pixels which are represented in SOA
  72  * order:
  73  *
  74  * #########
  75  * # 0 | 1 #
  76  * #---+---#
  77  * # 2 | 3 #
  78  * #########
  79  *
  80  * So the green channel (for example) of the four pixels is stored in
  81  * a single vector register: {g0, g1, g2, g3}.
  82  * The order stays the same even with multiple quads:
  83  * 0 1 4 5
  84  * 2 3 6 7
  85  * is stored as g0..g7
  86  */
  87
  88
  89 /**
  90  * Do one perspective divide per quad.
  91  *
  92  * For perspective interpolation, the final attribute value is given
  93  *
  94  *  a' = a/w = a * oow
  95  *
  96  * where
  97  *
  98  *  a = a0 + dadx*x + dady*y
  99  *  w = w0 + dwdx*x + dwdy*y
 100  *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
 101  *
 102  * Instead of computing the division per pixel, with this macro we compute the
 103  * division on the upper left pixel of each quad, and use a linear
 104  * approximation in the remaining pixels, given by:
 105  *
 106  *  da'dx = (dadx - dwdx*a)*oow
 107  *  da'dy = (dady - dwdy*a)*oow
 108  *
 109  * Ironically, this actually makes things slower -- probably because the
 110  * divide hardware unit is rarely used, whereas the multiply unit is typically
 111  * already saturated.
 112  */
 113 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
 114
 115
 116 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
 117 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
 118
 119
 120 static void
 121 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
 122 {
 123    if(attrib == 0)
 124       lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
 125    else
 126       lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
 127 }
 128
 129 static void
 130 calc_offsets(struct lp_build_context *coeff_bld,
 131              unsigned quad_start_index,
 132              LLVMValueRef *pixoffx,
 133              LLVMValueRef *pixoffy)
 134 {
 135    unsigned i;
 136    unsigned num_pix = coeff_bld->type.length;
 137    struct gallivm_state *gallivm = coeff_bld->gallivm;
 138    LLVMBuilderRef builder = coeff_bld->gallivm->builder;
 139    LLVMValueRef nr, pixxf, pixyf;
 140
 141    *pixoffx = coeff_bld->undef;
 142    *pixoffy = coeff_bld->undef;
 143
 144    for (i = 0; i < num_pix; i++) {
 145       nr = lp_build_const_int32(gallivm, i);
 146       pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
 147                                    (quad_start_index & 1) * 2);
 148       pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
 149                                    (quad_start_index & 2));
 150       *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
 151       *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
 152    }
 153 }
 154
 155 static void
 156 calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
 157                       struct gallivm_state *gallivm,
 158                       LLVMValueRef loop_iter,
 159                       LLVMValueRef mask_store,
 160                       LLVMValueRef pix_center_offset,
 161                       LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
 162 {
 163    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 164    LLVMBuilderRef builder = gallivm->builder;
 165    LLVMValueRef s_mask_and = NULL;
 166    LLVMValueRef centroid_x_offset = pix_center_offset;
 167    LLVMValueRef centroid_y_offset = pix_center_offset;
 168    for (int s = bld->coverage_samples - 1; s >= 0; s--) {
 169       LLVMValueRef sample_cov;
 170       LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
 171
 172       s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
 173       sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
 174       if (s == bld->coverage_samples - 1)
 175          s_mask_and = sample_cov;
 176       else
 177          s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
 178
 179       LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
 180       LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
 181
 182       x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
 183       y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
 184       x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
 185       y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
 186       centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
 187       centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
 188    }
 189    *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
 190    *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
 191 }
 192
 193 /* Much easier, and significantly less instructions in the per-stamp
 194  * part (less than half) but overall more instructions so a loss if
 195  * most quads are active. Might be a win though with larger vectors.
 196  * No ability to do per-quad divide (doable but not implemented)
 197  * Could be made to work with passed in pixel offsets (i.e. active quad merging).
 198  */
 199 static void
 200 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
 201                    LLVMValueRef a0_ptr,
 202                    LLVMValueRef dadx_ptr,
 203                    LLVMValueRef dady_ptr)
 204 {
 205    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 206    struct lp_build_context *setup_bld = &bld->setup_bld;
 207    struct gallivm_state *gallivm = coeff_bld->gallivm;
 208    LLVMBuilderRef builder = gallivm->builder;
 209    unsigned attrib;
 210
 211    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 212       /*
 213        * always fetch all 4 values for performance/simplicity
 214        * Note: we do that here because it seems to generate better
 215        * code. It generates a lot of moves initially but less
 216        * moves later. As far as I can tell this looks like a
 217        * llvm issue, instead of simply reloading the values from
 218        * the passed in pointers it if it runs out of registers
 219        * it spills/reloads them. Maybe some optimization passes
 220        * would help.
 221        * Might want to investigate this again later.
 222        */
 223       const unsigned interp = bld->interp[attrib];
 224       LLVMValueRef index = lp_build_const_int32(gallivm,
 225                                 attrib * TGSI_NUM_CHANNELS);
 226       LLVMValueRef ptr;
 227       LLVMValueRef dadxaos = setup_bld->zero;
 228       LLVMValueRef dadyaos = setup_bld->zero;
 229       LLVMValueRef a0aos = setup_bld->zero;
 230
 231       switch (interp) {
 232       case LP_INTERP_PERSPECTIVE:
 233          /* fall-through */
 234
 235       case LP_INTERP_LINEAR:
 236          ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
 237          ptr = LLVMBuildBitCast(builder, ptr,
 238                LLVMPointerType(setup_bld->vec_type, 0), "");
 239          dadxaos = LLVMBuildLoad(builder, ptr, "");
 240
 241          ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
 242          ptr = LLVMBuildBitCast(builder, ptr,
 243                LLVMPointerType(setup_bld->vec_type, 0), "");
 244          dadyaos = LLVMBuildLoad(builder, ptr, "");
 245
 246          attrib_name(dadxaos, attrib, 0, ".dadxaos");
 247          attrib_name(dadyaos, attrib, 0, ".dadyaos");
 248          /* fall-through */
 249
 250       case LP_INTERP_CONSTANT:
 251       case LP_INTERP_FACING:
 252          ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
 253          ptr = LLVMBuildBitCast(builder, ptr,
 254                LLVMPointerType(setup_bld->vec_type, 0), "");
 255          a0aos = LLVMBuildLoad(builder, ptr, "");
 256          attrib_name(a0aos, attrib, 0, ".a0aos");
 257          break;
 258
 259       case LP_INTERP_POSITION:
 260          /* Nothing to do as the position coeffs are already setup in slot 0 */
 261          continue;
 262
 263       default:
 264          assert(0);
 265          break;
 266       }
 267       bld->a0aos[attrib] = a0aos;
 268       bld->dadxaos[attrib] = dadxaos;
 269       bld->dadyaos[attrib] = dadyaos;
 270    }
 271 }
 272
 273 /**
 274  * Interpolate the shader input attribute values.
 275  * This is called for each (group of) quad(s).
 276  */
 277 static void
 278 attribs_update_simple(struct lp_build_interp_soa_context *bld,
 279                       struct gallivm_state *gallivm,
 280                       LLVMValueRef loop_iter,
 281                       LLVMValueRef mask_store,
 282                       LLVMValueRef sample_id,
 283                       int start,
 284                       int end)
 285 {
 286    LLVMBuilderRef builder = gallivm->builder;
 287    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 288    struct lp_build_context *setup_bld = &bld->setup_bld;
 289    LLVMValueRef oow = NULL;
 290    unsigned attrib;
 291    LLVMValueRef pixoffx;
 292    LLVMValueRef pixoffy;
 293    LLVMValueRef ptr;
 294    LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
 295
 296    /* could do this with code-generated passed in pixel offsets too */
 297
 298    assert(loop_iter);
 299    ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
 300    pixoffx = LLVMBuildLoad(builder, ptr, "");
 301    ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
 302    pixoffy = LLVMBuildLoad(builder, ptr, "");
 303
 304    pixoffx = LLVMBuildFAdd(builder, pixoffx,
 305                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
 306    pixoffy = LLVMBuildFAdd(builder, pixoffy,
 307                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
 308
 309    for (attrib = start; attrib < end; attrib++) {
 310       const unsigned mask = bld->mask[attrib];
 311       const unsigned interp = bld->interp[attrib];
 312       const unsigned loc = bld->interp_loc[attrib];
 313       unsigned chan;
 314
 315       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 316          if (mask & (1 << chan)) {
 317             LLVMValueRef index;
 318             LLVMValueRef dadx = coeff_bld->zero;
 319             LLVMValueRef dady = coeff_bld->zero;
 320             LLVMValueRef a = coeff_bld->zero;
 321             LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
 322
 323             index = lp_build_const_int32(gallivm, chan);
 324             switch (interp) {
 325             case LP_INTERP_PERSPECTIVE:
 326                /* fall-through */
 327
 328             case LP_INTERP_LINEAR:
 329                if (attrib == 0 && chan == 0) {
 330                   dadx = coeff_bld->one;
 331                   if (sample_id) {
 332                      LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
 333                      x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
 334                      a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
 335                   } else {
 336                      a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
 337                   }
 338                }
 339                else if (attrib == 0 && chan == 1) {
 340                   dady = coeff_bld->one;
 341                   if (sample_id) {
 342                      LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
 343                      y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
 344                      y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
 345                      a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
 346                   } else {
 347                      a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
 348                   }
 349                }
 350                else {
 351                   dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
 352                                                     coeff_bld->type, bld->dadxaos[attrib],
 353                                                     index);
 354                   dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
 355                                                     coeff_bld->type, bld->dadyaos[attrib],
 356                                                     index);
 357                   a = lp_build_extract_broadcast(gallivm, setup_bld->type,
 358                                                  coeff_bld->type, bld->a0aos[attrib],
 359                                                  index);
 360
 361                   if (bld->coverage_samples > 1) {
 362                      LLVMValueRef xoffset = pix_center_offset;
 363                      LLVMValueRef yoffset = pix_center_offset;
 364                      if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
 365                         LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
 366                         LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
 367
 368                         x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
 369                         y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
 370                         xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
 371                         yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
 372                      } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
 373                         calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
 374                                               pix_center_offset, &xoffset, &yoffset);
 375                      }
 376                      chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
 377                      chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
 378                   }
 379                }
 380
 381                /*
 382                 * a = a0 + (x * dadx + y * dady)
 383                 */
 384                a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
 385                a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
 386
 387                if (interp == LP_INTERP_PERSPECTIVE) {
 388                   if (oow == NULL) {
 389                      LLVMValueRef w = bld->attribs[0][3];
 390                      assert(attrib != 0);
 391                      assert(bld->mask[0] & TGSI_WRITEMASK_W);
 392                      oow = lp_build_rcp(coeff_bld, w);
 393                   }
 394                   a = lp_build_mul(coeff_bld, a, oow);
 395                }
 396                break;
 397
 398             case LP_INTERP_CONSTANT:
 399             case LP_INTERP_FACING:
 400                a = lp_build_extract_broadcast(gallivm, setup_bld->type,
 401                                               coeff_bld->type, bld->a0aos[attrib],
 402                                               index);
 403                break;
 404
 405             case LP_INTERP_POSITION:
 406                assert(attrib > 0);
 407                a = bld->attribs[0][chan];
 408                break;
 409
 410             default:
 411                assert(0);
 412                break;
 413             }
 414
 415             if ((attrib == 0) && (chan == 2) && !bld->depth_clamp){
 416                /* FIXME: Depth values can exceed 1.0, due to the fact that
 417                 * setup interpolation coefficients refer to (0,0) which causes
 418                 * precision loss. So we must clamp to 1.0 here to avoid artifacts.
 419                 * Note though values outside [0,1] are perfectly valid with
 420                 * depth clip disabled.
 421                 * XXX: If depth clip is disabled but we force depth clamp
 422                 * we may get values larger than 1.0 in the fs (but not in
 423                 * depth test). Not sure if that's an issue...
 424                 * Also, on a similar note, it is not obvious if the depth values
 425                 * appearing in fs (with depth clip disabled) should be clamped
 426                 * to [0,1], clamped to near/far or not be clamped at all...
 427                 */
 428                a = lp_build_min(coeff_bld, a, coeff_bld->one);
 429             }
 430             bld->attribs[attrib][chan] = a;
 431          }
 432       }
 433    }
 434 }
 435
 436 static LLVMValueRef
 437 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
 438                              struct gallivm_state *gallivm,
 439                              unsigned attrib, unsigned chan,
 440                              LLVMValueRef indir_index,
 441                              LLVMValueRef pixoffx,
 442                              LLVMValueRef pixoffy)
 443 {
 444    LLVMBuilderRef builder = gallivm->builder;
 445    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 446    const unsigned interp = bld->interp[attrib];
 447    LLVMValueRef dadx = coeff_bld->zero;
 448    LLVMValueRef dady = coeff_bld->zero;
 449    LLVMValueRef a = coeff_bld->zero;
 450
 451    LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
 452
 453    indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
 454    LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
 455    index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
 456
 457    /* size up to byte indices */
 458    index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
 459
 460    struct lp_type dst_type = coeff_bld->type;
 461    dst_type.length = 1;
 462    switch (interp) {
 463    case LP_INTERP_PERSPECTIVE:
 464       /* fall-through */
 465    case LP_INTERP_LINEAR:
 466
 467       dadx = lp_build_gather(gallivm, coeff_bld->type.length,
 468                              coeff_bld->type.width, dst_type,
 469                              true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
 470
 471       dady = lp_build_gather(gallivm, coeff_bld->type.length,
 472                              coeff_bld->type.width, dst_type,
 473                              true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
 474
 475       a = lp_build_gather(gallivm, coeff_bld->type.length,
 476                           coeff_bld->type.width, dst_type,
 477                           true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
 478
 479       /*
 480        * a = a0 + (x * dadx + y * dady)
 481        */
 482       a = lp_build_fmuladd(builder, dadx, pixoffx, a);
 483       a = lp_build_fmuladd(builder, dady, pixoffy, a);
 484
 485       if (interp == LP_INTERP_PERSPECTIVE) {
 486         LLVMValueRef w = bld->attribs[0][3];
 487         assert(attrib != 0);
 488         assert(bld->mask[0] & TGSI_WRITEMASK_W);
 489         LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
 490         a = lp_build_mul(coeff_bld, a, oow);
 491       }
 492
 493       break;
 494    case LP_INTERP_CONSTANT:
 495    case LP_INTERP_FACING:
 496       a = lp_build_gather(gallivm, coeff_bld->type.length,
 497                           coeff_bld->type.width, dst_type,
 498                           true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
 499       break;
 500    default:
 501       assert(0);
 502       break;
 503    }
 504    return a;
 505 }
 506
 507 LLVMValueRef
 508 lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
 509                     struct gallivm_state *gallivm,
 510                     LLVMValueRef loop_iter,
 511                     LLVMValueRef mask_store,
 512                     unsigned attrib, unsigned chan,
 513                     unsigned loc,
 514                     LLVMValueRef indir_index,
 515                     LLVMValueRef offsets[2])
 516 {
 517    LLVMBuilderRef builder = gallivm->builder;
 518    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 519    struct lp_build_context *setup_bld = &bld->setup_bld;
 520    LLVMValueRef pixoffx;
 521    LLVMValueRef pixoffy;
 522    LLVMValueRef ptr;
 523
 524    /* could do this with code-generated passed in pixel offsets too */
 525
 526    assert(loop_iter);
 527    ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
 528    pixoffx = LLVMBuildLoad(builder, ptr, "");
 529    ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
 530    pixoffy = LLVMBuildLoad(builder, ptr, "");
 531
 532    pixoffx = LLVMBuildFAdd(builder, pixoffx,
 533                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
 534    pixoffy = LLVMBuildFAdd(builder, pixoffy,
 535                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
 536
 537    LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);
 538
 539    if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
 540       if (bld->coverage_samples > 1) {
 541          pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
 542          pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
 543       }
 544
 545       if (offsets[0])
 546          pixoffx = LLVMBuildFAdd(builder, pixoffx,
 547                                  offsets[0], "");
 548       if (offsets[1])
 549          pixoffy = LLVMBuildFAdd(builder, pixoffy,
 550                                  offsets[1], "");
 551    } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
 552       LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
 553       LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
 554
 555       LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
 556                                                LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
 557       LLVMValueRef xoffset = lp_build_gather(gallivm,
 558                                              bld->coeff_bld.type.length,
 559                                              bld->coeff_bld.type.width,
 560                                              lp_elem_type(bld->coeff_bld.type),
 561                                              false,
 562                                              base_ptr,
 563                                              x_val_idx, true);
 564       LLVMValueRef yoffset = lp_build_gather(gallivm,
 565                                              bld->coeff_bld.type.length,
 566                                              bld->coeff_bld.type.width,
 567                                              lp_elem_type(bld->coeff_bld.type),
 568                                              false,
 569                                              base_ptr,
 570                                              y_val_idx, true);
 571
 572       if (bld->coverage_samples > 1) {
 573          pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
 574          pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
 575       }
 576    } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
 577       LLVMValueRef centroid_x_offset, centroid_y_offset;
 578
 579       /* for centroid find covered samples for this quad. */
 580       /* if all samples are covered use pixel centers */
 581       if (bld->coverage_samples > 1) {
 582          calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
 583                                pix_center_offset, &centroid_x_offset, &centroid_y_offset);
 584
 585          pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
 586          pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
 587       }
 588    }
 589
 590    // remap attrib properly.
 591    attrib++;
 592
 593    if (indir_index)
 594      return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
 595                                          indir_index, pixoffx, pixoffy);
 596
 597
 598    const unsigned interp = bld->interp[attrib];
 599    LLVMValueRef dadx = coeff_bld->zero;
 600    LLVMValueRef dady = coeff_bld->zero;
 601    LLVMValueRef a = coeff_bld->zero;
 602
 603    LLVMValueRef index = lp_build_const_int32(gallivm, chan);
 604
 605    switch (interp) {
 606    case LP_INTERP_PERSPECTIVE:
 607       /* fall-through */
 608    case LP_INTERP_LINEAR:
 609       dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
 610                                         coeff_bld->type, bld->dadxaos[attrib],
 611                                         index);
 612
 613       dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
 614                                         coeff_bld->type, bld->dadyaos[attrib],
 615                                         index);
 616
 617       a = lp_build_extract_broadcast(gallivm, setup_bld->type,
 618                                      coeff_bld->type, bld->a0aos[attrib],
 619                                      index);
 620
 621       /*
 622        * a = a0 + (x * dadx + y * dady)
 623        */
 624       a = lp_build_fmuladd(builder, dadx, pixoffx, a);
 625       a = lp_build_fmuladd(builder, dady, pixoffy, a);
 626
 627       if (interp == LP_INTERP_PERSPECTIVE) {
 628         LLVMValueRef w = bld->attribs[0][3];
 629         assert(attrib != 0);
 630         assert(bld->mask[0] & TGSI_WRITEMASK_W);
 631         LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
 632         a = lp_build_mul(coeff_bld, a, oow);
 633       }
 634
 635       break;
 636    case LP_INTERP_CONSTANT:
 637    case LP_INTERP_FACING:
 638       a = lp_build_extract_broadcast(gallivm, setup_bld->type,
 639                                      coeff_bld->type, bld->a0aos[attrib],
 640                                      index);
 641       break;
 642    default:
 643       assert(0);
 644       break;
 645    }
 646    return a;
 647 }
 648
 649 /**
 650  * Generate the position vectors.
 651  *
 652  * Parameter x0, y0 are the integer values with upper left coordinates.
 653  */
 654 static void
 655 pos_init(struct lp_build_interp_soa_context *bld,
 656          LLVMValueRef x0,
 657          LLVMValueRef y0)
 658 {
 659    LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
 660    struct lp_build_context *coeff_bld = &bld->coeff_bld;
 661
 662    bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
 663    bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
 664 }
 665
 666
 667 /**
 668  * Initialize fragment shader input attribute info.
 669  */
 670 void
 671 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
 672                          struct gallivm_state *gallivm,
 673                          unsigned num_inputs,
 674                          const struct lp_shader_input *inputs,
 675                          boolean pixel_center_integer,
 676                          unsigned coverage_samples,
 677                          LLVMValueRef sample_pos_array,
 678                          LLVMValueRef num_loop,
 679                          boolean depth_clamp,
 680                          LLVMBuilderRef builder,
 681                          struct lp_type type,
 682                          LLVMValueRef a0_ptr,
 683                          LLVMValueRef dadx_ptr,
 684                          LLVMValueRef dady_ptr,
 685                          LLVMValueRef x0,
 686                          LLVMValueRef y0)
 687 {
 688    struct lp_type coeff_type;
 689    struct lp_type setup_type;
 690    unsigned attrib;
 691    unsigned chan;
 692
 693    memset(bld, 0, sizeof *bld);
 694
 695    memset(&coeff_type, 0, sizeof coeff_type);
 696    coeff_type.floating = TRUE;
 697    coeff_type.sign = TRUE;
 698    coeff_type.width = 32;
 699    coeff_type.length = type.length;
 700
 701    memset(&setup_type, 0, sizeof setup_type);
 702    setup_type.floating = TRUE;
 703    setup_type.sign = TRUE;
 704    setup_type.width = 32;
 705    setup_type.length = TGSI_NUM_CHANNELS;
 706
 707
 708    /* XXX: we don't support interpolating into any other types */
 709    assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
 710
 711    lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
 712    lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
 713
 714    /* For convenience */
 715    bld->pos = bld->attribs[0];
 716    bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
 717
 718    /* Position */
 719    bld->mask[0] = TGSI_WRITEMASK_XYZW;
 720    bld->interp[0] = LP_INTERP_LINEAR;
 721    bld->interp_loc[0] = 0;
 722
 723    /* Inputs */
 724    for (attrib = 0; attrib < num_inputs; ++attrib) {
 725       bld->mask[1 + attrib] = inputs[attrib].usage_mask;
 726       bld->interp[1 + attrib] = inputs[attrib].interp;
 727       bld->interp_loc[1 + attrib] = inputs[attrib].location;
 728    }
 729    bld->num_attribs = 1 + num_inputs;
 730
 731    /* needed for indirect */
 732    bld->a0_ptr = a0_ptr;
 733    bld->dadx_ptr = dadx_ptr;
 734    bld->dady_ptr = dady_ptr;
 735
 736    /* Ensure all masked out input channels have a valid value */
 737    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
 738       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
 739          bld->attribs[attrib][chan] = bld->coeff_bld.undef;
 740       }
 741    }
 742
 743    if (pixel_center_integer) {
 744       bld->pos_offset = 0.0;
 745    } else {
 746       bld->pos_offset = 0.5;
 747    }
 748    bld->depth_clamp = depth_clamp;
 749    bld->coverage_samples = coverage_samples;
 750    bld->num_loop = num_loop;
 751    bld->sample_pos_array = sample_pos_array;
 752
 753    pos_init(bld, x0, y0);
 754
 755    /*
 756     * Simple method (single step interpolation) may be slower if vector length
 757     * is just 4, but the results are different (generally less accurate) with
 758     * the other method, so always use more accurate version.
 759     */
 760    {
 761       /* XXX this should use a global static table */
 762       unsigned i;
 763       unsigned num_loops = 16 / type.length;
 764       LLVMValueRef pixoffx, pixoffy, index;
 765       LLVMValueRef ptr;
 766
 767       bld->xoffset_store = lp_build_array_alloca(gallivm,
 768                                                  lp_build_vec_type(gallivm, type),
 769                                                  lp_build_const_int32(gallivm, num_loops),
 770                                                  "");
 771       bld->yoffset_store = lp_build_array_alloca(gallivm,
 772                                                  lp_build_vec_type(gallivm, type),
 773                                                  lp_build_const_int32(gallivm, num_loops),
 774                                                  "");
 775       for (i = 0; i < num_loops; i++) {
 776          index = lp_build_const_int32(gallivm, i);
 777          calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
 778          ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
 779          LLVMBuildStore(builder, pixoffx, ptr);
 780          ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
 781          LLVMBuildStore(builder, pixoffy, ptr);
 782       }
 783    }
 784    coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
 785 }
 786
 787
 788 /*
 789  * Advance the position and inputs to the given quad within the block.
 790  */
 791
 792 void
 793 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
 794                                       struct gallivm_state *gallivm,
 795                                       LLVMValueRef quad_start_index,
 796                                       LLVMValueRef mask_store,
 797                                       LLVMValueRef sample_id)
 798 {
 799    attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
 800 }
 801
 802 void
 803 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
 804                                    struct gallivm_state *gallivm,
 805                                    LLVMValueRef quad_start_index,
 806                                    LLVMValueRef sample_id)
 807 {
 808    attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
 809 }
 810